Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_DistObject_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_DISTOBJECT_DEF_HPP
11#define TPETRA_DISTOBJECT_DEF_HPP
12
20
21#include "Tpetra_Distributor.hpp"
24#include "Tpetra_Details_checkGlobalError.hpp"
26#include "Tpetra_Util.hpp" // Details::createPrefix
27#include "Teuchos_CommHelpers.hpp"
28#include "Teuchos_TypeNameTraits.hpp"
29#include <typeinfo>
30#include <memory>
31#include <sstream>
32
33namespace Tpetra {
34
35namespace { // (anonymous)
36template <class DeviceType, class IndexType = size_t>
37struct SumFunctor {
38 SumFunctor(const Kokkos::View<const size_t*, DeviceType>& viewToSum)
39 : viewToSum_(viewToSum) {}
40 KOKKOS_INLINE_FUNCTION void operator()(const IndexType i, size_t& lclSum) const {
41 lclSum += viewToSum_(i);
42 }
43 Kokkos::View<const size_t*, DeviceType> viewToSum_;
44};
45
46template <class DeviceType, class IndexType = size_t>
47size_t
48countTotalImportPackets(const Kokkos::View<const size_t*, DeviceType>& numImportPacketsPerLID) {
49 using Kokkos::parallel_reduce;
50 typedef DeviceType DT;
51 typedef typename DT::execution_space DES;
52 typedef Kokkos::RangePolicy<DES, IndexType> range_type;
53
54 const IndexType numOut = numImportPacketsPerLID.extent(0);
55 size_t totalImportPackets = 0;
56 parallel_reduce("Count import packets",
57 range_type(0, numOut),
58 SumFunctor<DeviceType, IndexType>(numImportPacketsPerLID),
59 totalImportPackets);
60 return totalImportPackets;
61}
62} // namespace
63
64template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
66 DistObject(const Teuchos::RCP<const map_type>& map)
67 : map_(map) {}
68
69template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
70std::string
72 description() const {
73 using Teuchos::TypeNameTraits;
74
75 std::ostringstream os;
76 os << "\"Tpetra::DistObject\": {"
77 << "Packet: " << TypeNameTraits<packet_type>::name()
78 << ", LocalOrdinal: " << TypeNameTraits<local_ordinal_type>::name()
79 << ", GlobalOrdinal: " << TypeNameTraits<global_ordinal_type>::name()
80 << ", Node: " << TypeNameTraits<Node>::name();
81 if (this->getObjectLabel() != "") {
82 os << "Label: \"" << this->getObjectLabel() << "\"";
83 }
84 os << "}";
85 return os.str();
86}
87
88template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
90 describe(Teuchos::FancyOStream& out,
91 const Teuchos::EVerbosityLevel verbLevel) const {
92 using std::endl;
93 using Teuchos::rcpFromRef;
94 using Teuchos::TypeNameTraits;
95 const Teuchos::EVerbosityLevel vl = (verbLevel == Teuchos::VERB_DEFAULT) ? Teuchos::VERB_LOW : verbLevel;
96 Teuchos::RCP<const Teuchos::Comm<int>> comm = this->getMap()->getComm();
97 const int myRank = comm.is_null() ? 0 : comm->getRank();
98 const int numProcs = comm.is_null() ? 1 : comm->getSize();
99
100 if (vl != Teuchos::VERB_NONE) {
101 Teuchos::OSTab tab0(out);
102 if (myRank == 0) {
103 out << "\"Tpetra::DistObject\":" << endl;
104 }
105 Teuchos::OSTab tab1(out);
106 if (myRank == 0) {
107 out << "Template parameters:" << endl;
108 {
109 Teuchos::OSTab tab2(out);
110 out << "Packet: " << TypeNameTraits<packet_type>::name() << endl
111 << "LocalOrdinal: " << TypeNameTraits<local_ordinal_type>::name() << endl
112 << "GlobalOrdinal: " << TypeNameTraits<global_ordinal_type>::name() << endl
113 << "Node: " << TypeNameTraits<node_type>::name() << endl;
114 }
115 if (this->getObjectLabel() != "") {
116 out << "Label: \"" << this->getObjectLabel() << "\"" << endl;
117 }
118 } // if myRank == 0
119
120 // Describe the Map.
121 {
122 if (myRank == 0) {
123 out << "Map:" << endl;
124 }
125 Teuchos::OSTab tab2(out);
126 map_->describe(out, vl);
127 }
128
129 // At verbosity > VERB_LOW, each process prints something.
130 if (vl > Teuchos::VERB_LOW) {
131 for (int p = 0; p < numProcs; ++p) {
132 if (myRank == p) {
133 out << "Process " << myRank << ":" << endl;
134 Teuchos::OSTab tab2(out);
135 out << "Export buffer size (in packets): "
136 << exports_.extent(0)
137 << endl
138 << "Import buffer size (in packets): "
139 << imports_.extent(0)
140 << endl;
141 }
142 if (!comm.is_null()) {
143 comm->barrier(); // give output time to finish
144 comm->barrier();
145 comm->barrier();
146 }
147 } // for each process rank p
148 } // if vl > VERB_LOW
149 } // if vl != VERB_NONE
150}
151
152template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
154 removeEmptyProcessesInPlace(const Teuchos::RCP<const map_type>& /* newMap */) {
155 TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
156 "Tpetra::DistObject::removeEmptyProcessesInPlace: Not implemented");
157}
158
159/* These are provided in base DistObject template
160template<class DistObjectType>
161void
162removeEmptyProcessesInPlace (Teuchos::RCP<DistObjectType>& input,
163 const Teuchos::RCP<const Map<typename DistObjectType::local_ordinal_type,
164 typename DistObjectType::global_ordinal_type,
165 typename DistObjectType::node_type> >& newMap)
166{
167 input->removeEmptyProcessesInPlace (newMap);
168 if (newMap.is_null ()) { // my process is excluded
169 input = Teuchos::null;
170 }
171}
172
173template<class DistObjectType>
174void
175removeEmptyProcessesInPlace (Teuchos::RCP<DistObjectType>& input)
176{
177 using Teuchos::RCP;
178 typedef typename DistObjectType::local_ordinal_type LO;
179 typedef typename DistObjectType::global_ordinal_type GO;
180 typedef typename DistObjectType::node_type NT;
181 typedef Map<LO, GO, NT> map_type;
182
183 RCP<const map_type> newMap = input->getMap ()->removeEmptyProcesses ();
184 removeEmptyProcessesInPlace<DistObjectType> (input, newMap);
185}
186*/
187
188template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
192 const CombineMode CM,
193 const bool restrictedMode) {
194 using Details::Behavior;
195 using std::endl;
196 const char modeString[] = "doImport (forward mode)";
197
198 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
199 // output to std::cerr on every MPI process. This is unwise for
200 // runs with large numbers of MPI processes.
201 const bool verbose = Behavior::verbose("DistObject");
202 std::unique_ptr<std::string> prefix;
203 if (verbose) {
204 prefix = this->createPrefix("DistObject", modeString);
205 std::ostringstream os;
206 os << *prefix << "Start" << endl;
207 std::cerr << os.str();
208 }
209 this->beginImport(source, importer, CM, restrictedMode);
210 this->endImport(source, importer, CM, restrictedMode);
211 if (verbose) {
212 std::ostringstream os;
213 os << *prefix << "Done" << endl;
214 std::cerr << os.str();
215 }
216}
217
218template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
222 const CombineMode CM,
223 const bool restrictedMode) {
224 using Details::Behavior;
225 using std::endl;
226 const char modeString[] = "doExport (forward mode)";
227
228 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
229 // output to std::cerr on every MPI process. This is unwise for
230 // runs with large numbers of MPI processes.
231 const bool verbose = Behavior::verbose("DistObject");
232 std::unique_ptr<std::string> prefix;
233 if (verbose) {
234 prefix = this->createPrefix("DistObject", modeString);
235 std::ostringstream os;
236 os << *prefix << "Start" << endl;
237 std::cerr << os.str();
238 }
239 this->beginExport(source, exporter, CM, restrictedMode);
240 this->endExport(source, exporter, CM, restrictedMode);
241 if (verbose) {
242 std::ostringstream os;
243 os << *prefix << "Done" << endl;
244 std::cerr << os.str();
245 }
246}
247
248template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
252 const CombineMode CM,
253 const bool restrictedMode) {
254 using Details::Behavior;
255 using std::endl;
256 const char modeString[] = "doImport (reverse mode)";
257
258 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
259 // output to std::cerr on every MPI process. This is unwise for
260 // runs with large numbers of MPI processes.
261 const bool verbose = Behavior::verbose("DistObject");
262 std::unique_ptr<std::string> prefix;
263 if (verbose) {
264 prefix = this->createPrefix("DistObject", modeString);
265 std::ostringstream os;
266 os << *prefix << "Start" << endl;
267 std::cerr << os.str();
268 }
269 this->beginImport(source, exporter, CM, restrictedMode);
270 this->endImport(source, exporter, CM, restrictedMode);
271 if (verbose) {
272 std::ostringstream os;
273 os << *prefix << "Done" << endl;
274 std::cerr << os.str();
275 }
276}
277
278template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
282 const CombineMode CM,
283 const bool restrictedMode) {
284 using Details::Behavior;
285 using std::endl;
286 const char modeString[] = "doExport (reverse mode)";
287
288 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
289 // output to std::cerr on every MPI process. This is unwise for
290 // runs with large numbers of MPI processes.
291 const bool verbose = Behavior::verbose("DistObject");
292 std::unique_ptr<std::string> prefix;
293 if (verbose) {
294 prefix = this->createPrefix("DistObject", modeString);
295 std::ostringstream os;
296 os << *prefix << "Start" << endl;
297 std::cerr << os.str();
298 }
299 this->beginExport(source, importer, CM, restrictedMode);
300 this->endExport(source, importer, CM, restrictedMode);
301 if (verbose) {
302 std::ostringstream os;
303 os << *prefix << "Done" << endl;
304 std::cerr << os.str();
305 }
306}
307
308template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
312 const CombineMode CM,
313 const bool restrictedMode) {
314 using Details::Behavior;
315 using std::endl;
316 const char modeString[] = "beginImport (forward mode)";
317
318 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
319 // output to std::cerr on every MPI process. This is unwise for
320 // runs with large numbers of MPI processes.
321 const bool verbose = Behavior::verbose("DistObject");
322 std::unique_ptr<std::string> prefix;
323 if (verbose) {
324 prefix = this->createPrefix("DistObject", modeString);
325 std::ostringstream os;
326 os << *prefix << "Start" << endl;
327 std::cerr << os.str();
328 }
329 this->beginTransfer(source, importer, modeString, DoForward, CM, restrictedMode);
330 if (verbose) {
331 std::ostringstream os;
332 os << *prefix << "Done" << endl;
333 std::cerr << os.str();
334 }
335}
336
337template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
338void DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
339 beginExport(const SrcDistObject& source,
340 const Export<LocalOrdinal, GlobalOrdinal, Node>& exporter,
341 const CombineMode CM,
342 const bool restrictedMode) {
343 using Details::Behavior;
344 using std::endl;
345 const char modeString[] = "beginExport (forward mode)";
346
347 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
348 // output to std::cerr on every MPI process. This is unwise for
349 // runs with large numbers of MPI processes.
350 const bool verbose = Behavior::verbose("DistObject");
351 std::unique_ptr<std::string> prefix;
352 if (verbose) {
353 prefix = this->createPrefix("DistObject", modeString);
354 std::ostringstream os;
355 os << *prefix << "Start" << endl;
356 std::cerr << os.str();
357 }
358 this->beginTransfer(source, exporter, modeString, DoForward, CM, restrictedMode);
359 if (verbose) {
360 std::ostringstream os;
361 os << *prefix << "Done" << endl;
362 std::cerr << os.str();
363 }
364}
365
366template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
367void DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
368 beginImport(const SrcDistObject& source,
369 const Export<LocalOrdinal, GlobalOrdinal, Node>& exporter,
370 const CombineMode CM,
371 const bool restrictedMode) {
372 using Details::Behavior;
373 using std::endl;
374 const char modeString[] = "beginImport (reverse mode)";
375
376 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
377 // output to std::cerr on every MPI process. This is unwise for
378 // runs with large numbers of MPI processes.
379 const bool verbose = Behavior::verbose("DistObject");
380 std::unique_ptr<std::string> prefix;
381 if (verbose) {
382 prefix = this->createPrefix("DistObject", modeString);
383 std::ostringstream os;
384 os << *prefix << "Start" << endl;
385 std::cerr << os.str();
386 }
387 this->beginTransfer(source, exporter, modeString, DoReverse, CM, restrictedMode);
388 if (verbose) {
389 std::ostringstream os;
390 os << *prefix << "Done" << endl;
391 std::cerr << os.str();
392 }
393}
394
395template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
396void DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
397 beginExport(const SrcDistObject& source,
398 const Import<LocalOrdinal, GlobalOrdinal, Node>& importer,
399 const CombineMode CM,
400 const bool restrictedMode) {
401 using Details::Behavior;
402 using std::endl;
403 const char modeString[] = "beginExport (reverse mode)";
404
405 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
406 // output to std::cerr on every MPI process. This is unwise for
407 // runs with large numbers of MPI processes.
408 const bool verbose = Behavior::verbose("DistObject");
409 std::unique_ptr<std::string> prefix;
410 if (verbose) {
411 prefix = this->createPrefix("DistObject", modeString);
412 std::ostringstream os;
413 os << *prefix << "Start" << endl;
414 std::cerr << os.str();
415 }
416 this->beginTransfer(source, importer, modeString, DoReverse, CM, restrictedMode);
417 if (verbose) {
418 std::ostringstream os;
419 os << *prefix << "Done" << endl;
420 std::cerr << os.str();
421 }
422}
423
424template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
425void DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
426 endImport(const SrcDistObject& source,
427 const Import<LocalOrdinal, GlobalOrdinal, Node>& importer,
428 const CombineMode CM,
429 const bool restrictedMode) {
430 using Details::Behavior;
431 using std::endl;
432 const char modeString[] = "endImport (forward mode)";
433
434 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
435 // output to std::cerr on every MPI process. This is unwise for
436 // runs with large numbers of MPI processes.
437 const bool verbose = Behavior::verbose("DistObject");
438 std::unique_ptr<std::string> prefix;
439 if (verbose) {
440 prefix = this->createPrefix("DistObject", modeString);
441 std::ostringstream os;
442 os << *prefix << "Start" << endl;
443 std::cerr << os.str();
444 }
445 this->endTransfer(source, importer, modeString, DoForward, CM, restrictedMode);
446 if (verbose) {
447 std::ostringstream os;
448 os << *prefix << "Done" << endl;
449 std::cerr << os.str();
450 }
451}
452
453template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
454void DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
455 endExport(const SrcDistObject& source,
456 const Export<LocalOrdinal, GlobalOrdinal, Node>& exporter,
457 const CombineMode CM,
458 const bool restrictedMode) {
459 using Details::Behavior;
460 using std::endl;
461 const char modeString[] = "endExport (forward mode)";
462
463 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
464 // output to std::cerr on every MPI process. This is unwise for
465 // runs with large numbers of MPI processes.
466 const bool verbose = Behavior::verbose("DistObject");
467 std::unique_ptr<std::string> prefix;
468 if (verbose) {
469 prefix = this->createPrefix("DistObject", modeString);
470 std::ostringstream os;
471 os << *prefix << "Start" << endl;
472 std::cerr << os.str();
473 }
474 this->endTransfer(source, exporter, modeString, DoForward, CM, restrictedMode);
475 if (verbose) {
476 std::ostringstream os;
477 os << *prefix << "Done" << endl;
478 std::cerr << os.str();
479 }
481
482template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
486 const CombineMode CM,
487 const bool restrictedMode) {
488 using Details::Behavior;
489 using std::endl;
490 const char modeString[] = "endImport (reverse mode)";
491
492 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
493 // output to std::cerr on every MPI process. This is unwise for
494 // runs with large numbers of MPI processes.
495 const bool verbose = Behavior::verbose("DistObject");
496 std::unique_ptr<std::string> prefix;
497 if (verbose) {
498 prefix = this->createPrefix("DistObject", modeString);
499 std::ostringstream os;
500 os << *prefix << "Start" << endl;
501 std::cerr << os.str();
502 }
503 this->endTransfer(source, exporter, modeString, DoReverse, CM, restrictedMode);
504 if (verbose) {
505 std::ostringstream os;
506 os << *prefix << "Done" << endl;
507 std::cerr << os.str();
508 }
509}
510
511template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
512void DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
513 endExport(const SrcDistObject& source,
514 const Import<LocalOrdinal, GlobalOrdinal, Node>& importer,
515 const CombineMode CM,
516 const bool restrictedMode) {
517 using Details::Behavior;
518 using std::endl;
519 const char modeString[] = "endExport (reverse mode)";
520
521 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
522 // output to std::cerr on every MPI process. This is unwise for
523 // runs with large numbers of MPI processes.
524 const bool verbose = Behavior::verbose("DistObject");
525 std::unique_ptr<std::string> prefix;
526 if (verbose) {
527 prefix = this->createPrefix("DistObject", modeString);
528 std::ostringstream os;
529 os << *prefix << "Start" << endl;
530 std::cerr << os.str();
531 }
532 this->endTransfer(source, importer, modeString, DoReverse, CM, restrictedMode);
533 if (verbose) {
534 std::ostringstream os;
535 os << *prefix << "Done" << endl;
536 std::cerr << os.str();
537 }
538}
539
540template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
542 transferArrived() const {
543 return distributorActor_.isReady();
544}
545
546template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
548 isDistributed() const {
549 return map_->isDistributed();
550}
551
552template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
553size_t
556 return 0; // default implementation; subclasses may override
557}
558
559template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
561 doTransfer(const SrcDistObject& src,
562 const ::Tpetra::Details::Transfer<local_ordinal_type, global_ordinal_type, node_type>& transfer,
563 const char modeString[],
565 const CombineMode CM,
566 bool restrictedMode) {
567 beginTransfer(src, transfer, modeString, revOp, CM, restrictedMode);
568 endTransfer(src, transfer, modeString, revOp, CM, restrictedMode);
569}
570
571template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
574 const bool verbose,
575 const std::string* prefix,
576 const bool /*remoteLIDsContiguous*/,
577 const CombineMode /*CM*/) {
578 if (verbose) {
579 std::ostringstream os;
580 os << *prefix << "Realloc (if needed) imports_ from "
581 << imports_.extent(0) << " to " << newSize << std::endl;
582 std::cerr << os.str();
583 }
584 using ::Tpetra::Details::reallocDualViewIfNeeded;
585 const bool reallocated =
586 reallocDualViewIfNeeded(this->imports_, newSize, "imports");
587 if (verbose) {
588 std::ostringstream os;
589 os << *prefix << "Finished realloc'ing imports_" << std::endl;
590 std::cerr << os.str();
591 }
592 return reallocated;
593}
594
595template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
598 const size_t numImportLIDs) {
599 using Details::Behavior;
600 using std::endl;
601 using ::Tpetra::Details::dualViewStatusToString;
602 using ::Tpetra::Details::reallocDualViewIfNeeded;
603 // If an array is already allocated, and if is at least
604 // tooBigFactor times bigger than it needs to be, free it and
605 // reallocate to the size we need, in order to save space.
606 // Otherwise, take subviews to reduce allocation size.
607 constexpr size_t tooBigFactor = 10;
608
609 const bool verbose = Behavior::verbose("DistObject");
610 std::unique_ptr<std::string> prefix;
611 if (verbose) {
612 prefix = this->createPrefix("DistObject",
613 "reallocArraysForNumPacketsPerLid");
614 std::ostringstream os;
615 os << *prefix
616 << "numExportLIDs: " << numExportLIDs
617 << ", numImportLIDs: " << numImportLIDs
618 << endl;
619 os << *prefix << "DualView status before:" << endl
620 << *prefix
621 << dualViewStatusToString(this->numExportPacketsPerLID_,
622 "numExportPacketsPerLID_")
623 << endl
624 << *prefix
625 << dualViewStatusToString(this->numImportPacketsPerLID_,
626 "numImportPacketsPerLID_")
627 << endl;
628 std::cerr << os.str();
629 }
630
631 // Reallocate numExportPacketsPerLID_ if needed.
632 const bool firstReallocated =
633 reallocDualViewIfNeeded(this->numExportPacketsPerLID_,
635 "numExportPacketsPerLID",
637 true); // need fence before, if realloc'ing
638
639 // If we reallocated above, then we fenced after that
640 // reallocation. This means that we don't need to fence again,
641 // before the next reallocation.
643 const bool secondReallocated =
644 reallocDualViewIfNeeded(this->numImportPacketsPerLID_,
646 "numImportPacketsPerLID",
649
650 if (verbose) {
651 std::ostringstream os;
652 os << *prefix << "DualView status after:" << endl
653 << *prefix << dualViewStatusToString(this->numExportPacketsPerLID_, "numExportPacketsPerLID_")
654 << endl
655 << *prefix << dualViewStatusToString(this->numImportPacketsPerLID_, "numImportPacketsPerLID_")
656 << endl;
657 std::cerr << os.str();
658 }
659
661}
662
663template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
666 const ::Tpetra::Details::Transfer<local_ordinal_type, global_ordinal_type, node_type>& transfer,
667 const char modeString[],
668 const ReverseOption revOp,
669 const CombineMode CM,
671 using Details::Behavior;
674 using Kokkos::Compat::create_const_view;
675 using Kokkos::Compat::getArrayView;
676 using Kokkos::Compat::getConstArrayView;
677 using Kokkos::Compat::getKokkosViewDeepCopy;
678 using std::endl;
679 using ::Tpetra::Details::dualViewStatusToString;
680 using ::Tpetra::Details::getArrayViewFromDualView;
681
682 const bool commOnHost = !Behavior::assumeMpiIsGPUAware();
683 const char funcNameHost[] = "Tpetra::DistObject::beginTransfer[Host]";
684 const char funcNameDevice[] = "Tpetra::DistObject::beginTransfer[Device]";
686
687 ProfilingRegion region_doTransfer(funcName);
688 const bool verbose = Behavior::verbose("DistObject");
689 std::shared_ptr<std::string> prefix;
690 if (verbose) {
691 std::ostringstream os;
692 prefix = this->createPrefix("DistObject", "doTransfer");
693 os << *prefix << "Source type: " << Teuchos::typeName(src)
694 << ", Target type: " << Teuchos::typeName(*this) << endl;
695 std::cerr << os.str();
696 }
697
698 // "Restricted Mode" does two things:
699 // 1) Skips copyAndPermute
700 // 2) Allows the "target" Map of the transfer to be a subset of
701 // the Map of *this, in a "locallyFitted" sense.
702 //
703 // This cannot be used if #2 is not true, OR there are permutes.
704 // Source Maps still need to match
705
706 // mfh 18 Oct 2017: Set TPETRA_DEBUG to true to enable extra debug
707 // checks. These may communicate more.
708 const bool debug = Behavior::debug("DistObject");
709 if (debug) {
710 if (!restrictedMode && revOp == DoForward) {
711 const bool myMapSameAsTransferTgtMap =
712 this->getMap()->isSameAs(*(transfer.getTargetMap()));
714 "Tpetra::DistObject::" << modeString << ": For forward-mode "
715 "communication, the target DistObject's Map must be the same "
716 "(in the sense of Tpetra::Map::isSameAs) as the input "
717 "Export/Import object's target Map.");
718 } else if (!restrictedMode && revOp == DoReverse) {
719 const bool myMapSameAsTransferSrcMap =
720 this->getMap()->isSameAs(*(transfer.getSourceMap()));
722 "Tpetra::DistObject::" << modeString << ": For reverse-mode "
723 "communication, the target DistObject's Map must be the same "
724 "(in the sense of Tpetra::Map::isSameAs) as the input "
725 "Export/Import object's source Map.");
726 } else if (restrictedMode && revOp == DoForward) {
727 const bool myMapLocallyFittedTransferTgtMap =
728 this->getMap()->isLocallyFitted(*(transfer.getTargetMap()));
729 TEUCHOS_TEST_FOR_EXCEPTION(!myMapLocallyFittedTransferTgtMap, std::invalid_argument,
730 "Tpetra::DistObject::" << modeString << ": For forward-mode "
731 "communication using restricted mode, Export/Import object's "
732 "target Map must be locally fitted (in the sense of "
733 "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
734 } else { // if (restrictedMode && revOp == DoReverse)
735 const bool myMapLocallyFittedTransferSrcMap =
736 this->getMap()->isLocallyFitted(*(transfer.getSourceMap()));
737 TEUCHOS_TEST_FOR_EXCEPTION(!myMapLocallyFittedTransferSrcMap, std::invalid_argument,
738 "Tpetra::DistObject::" << modeString << ": For reverse-mode "
739 "communication using restricted mode, Export/Import object's "
740 "source Map must be locally fitted (in the sense of "
741 "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
742 }
744 // SrcDistObject need not even _have_ Maps. However, if the
745 // source object is a DistObject, it has a Map, and we may
746 // compare that Map with the Transfer's Maps.
747 const this_type* srcDistObj = dynamic_cast<const this_type*>(&src);
748 if (srcDistObj != nullptr) {
749 if (revOp == DoForward) {
750 const bool srcMapSameAsImportSrcMap =
751 srcDistObj->getMap()->isSameAs(*(transfer.getSourceMap()));
753 "Tpetra::DistObject::" << modeString << ": For forward-mode "
754 "communication, the source DistObject's Map must be the same "
755 "as the input Export/Import object's source Map.");
756 } else { // revOp == DoReverse
757 const bool srcMapSameAsImportTgtMap =
758 srcDistObj->getMap()->isSameAs(*(transfer.getTargetMap()));
760 "Tpetra::DistObject::" << modeString << ": For reverse-mode "
761 "communication, the source DistObject's Map must be the same "
762 "as the input Export/Import object's target Map.");
763 }
764 }
765 }
766
767 const size_t numSameIDs = transfer.getNumSameIDs();
768 Distributor& distor = transfer.getDistributor();
769 const Details::DistributorPlan& distributorPlan = (revOp == DoForward) ? distor.getPlan() : *distor.getPlan().getReversePlan();
770
771 TEUCHOS_TEST_FOR_EXCEPTION(debug && restrictedMode &&
772 (transfer.getPermuteToLIDs_dv().extent(0) != 0 ||
773 transfer.getPermuteFromLIDs_dv().extent(0) != 0),
774 std::invalid_argument,
775 "Tpetra::DistObject::" << modeString << ": Transfer object "
776 "cannot have permutes in restricted mode.");
777
778 // Do we need all communication buffers to live on host?
779 if (verbose) {
780 std::ostringstream os;
781 os << *prefix << "doTransfer: Use new interface; "
782 "commOnHost="
783 << (commOnHost ? "true" : "false") << endl;
784 std::cerr << os.str();
785 }
786
787 using const_lo_dv_type =
788 Kokkos::DualView<const local_ordinal_type*, buffer_device_type>;
789 const_lo_dv_type permuteToLIDs = (revOp == DoForward) ? transfer.getPermuteToLIDs_dv() : transfer.getPermuteFromLIDs_dv();
790 const_lo_dv_type permuteFromLIDs = (revOp == DoForward) ? transfer.getPermuteFromLIDs_dv() : transfer.getPermuteToLIDs_dv();
791 const_lo_dv_type remoteLIDs = (revOp == DoForward) ? transfer.getRemoteLIDs_dv() : transfer.getExportLIDs_dv();
792 const_lo_dv_type exportLIDs = (revOp == DoForward) ? transfer.getExportLIDs_dv() : transfer.getRemoteLIDs_dv();
793 const bool canTryAliasing = (revOp == DoForward) ? transfer.areRemoteLIDsContiguous() : transfer.areExportLIDsContiguous();
794 // const bool canTryAliasing = false;
795
796 ProfilingRegion region_dTN(funcName);
797
798 if (verbose) {
799 std::ostringstream os;
800 os << *prefix << "Input arguments:" << endl
801 << *prefix << " combineMode: " << combineModeToString(CM) << endl
802 << *prefix << " numSameIDs: " << numSameIDs << endl
803 << *prefix << " "
804 << dualViewStatusToString(permuteToLIDs, "permuteToLIDs") << endl
805 << *prefix << " "
806 << dualViewStatusToString(permuteFromLIDs, "permuteFromLIDs") << endl
807 << *prefix << " "
808 << dualViewStatusToString(remoteLIDs, "remoteLIDs") << endl
809 << *prefix << " "
810 << dualViewStatusToString(exportLIDs, "exportLIDs") << endl
811 << *prefix << " revOp: Do" << (revOp == DoReverse ? "Reverse" : "Forward") << endl
812 << *prefix << " commOnHost: " << (commOnHost ? "true" : "false") << endl;
813 std::cerr << os.str();
814 }
815
816 {
817 ProfilingRegion region_cs("Tpetra::DistObject::beginTransfer::checkSizes");
818 if (verbose) {
819 std::ostringstream os;
820 os << *prefix << "1. checkSizes" << endl;
821 std::cerr << os.str();
822 }
823 const bool checkSizesResult = this->checkSizes(src);
824 TEUCHOS_TEST_FOR_EXCEPTION(!checkSizesResult, std::invalid_argument,
825 "Tpetra::DistObject::doTransfer: checkSizes() indicates that the "
826 "destination object is not a legal target for redistribution from the "
827 "source object. This probably means that they do not have the same "
828 "dimensions. For example, MultiVectors must have the same number of "
829 "rows and columns.");
830 }
831
832 // The method may return zero even if the implementation actually
833 // does have a constant number of packets per LID. However, if it
834 // returns nonzero, we may use this information to avoid
835 // (re)allocating num{Ex,Im}portPacketsPerLID_. packAndPrepare()
836 // will set this to its final value.
837 //
838 // We only need this if CM != ZERO, but it has to be lifted out of
839 // that scope because there are multiple tests for CM != ZERO.
840 size_t constantNumPackets = this->constantNumberOfPackets();
841 if (verbose) {
842 std::ostringstream os;
843 os << *prefix << "constantNumPackets=" << constantNumPackets << endl;
844 std::cerr << os.str();
845 }
846
847 // Do we need to do communication?
848 bool needCommunication = true;
849 // We only need to send data if the combine mode is not ZERO.
850 if (CM != ZERO) {
851 // This may be NULL. It will be used below.
852 const this_type* srcDistObj = dynamic_cast<const this_type*>(&src);
853
854 if (revOp == DoReverse && !this->isDistributed()) {
855 needCommunication = false;
856 }
857 // FIXME (mfh 30 Jun 2013): Checking whether the source object
858 // is distributed requires a cast to DistObject. If it's not a
859 // DistObject, then I'm not quite sure what to do. Perhaps it
860 // would be more appropriate for SrcDistObject to have an
861 // isDistributed() method. For now, I'll just assume that we
862 // need to do communication unless the cast succeeds and the
863 // source is not distributed.
864 else if (revOp == DoForward && srcDistObj != NULL &&
865 !srcDistObj->isDistributed()) {
866 needCommunication = false;
867 }
868 } // if (CM != ZERO)
869 else {
870 needCommunication = false;
871 }
872
873 // The operations for the transfer can be performed in different
874 // order. The "safe" way is
875 //
876 // - copyAndPermute |
877 // - packAndPrepare |--- beginTransfer
878 // - doPostRecvs |
879 // - doPostSends |
880 //
881 // - doWaitsRecv |
882 // - unpackAndCombine |--- endTransfer
883 // - doWaitsSend |
884
885 // This is "safe" because the local computation steps
886 // copyAndPermute and packAndPrepare are free to run on host or
887 // device provided that the data is appropriately synced.
888 // Afterwards, all the communication options can run independently
889 // of the computation. This means that there are no constraints in
890 // terms of memory spaces out of which the different steps need to
891 // run.
892
893 // However, for performance it can be beneficial to overlap
894 // communication and computation, leading to this sequence of
895 // operations:
896 //
897 // - doPostRecvs |
898 // - packAndPrepare |--- beginTransfer
899 // - doPostSends |
900 // - copyAndPermute |
901 //
902 // - doWaitsRecv |
903 // - unpackAndCombine |--- endTransfer
904 // - doWaitsSend |
905 //
906 // Note that this is not the same as overlap of communication and
907 // computation in the sparse matrix-vector product which would involve
908 // performing computation between beginTransfer and endTransfer.
909 //
910 // The second approach has two advantages:
911 // 1) Receives and sends are separated by computation. This
912 // decreases the likelihood of MPI having to allocate temporary
913 // buffers for unexpectedly received messages.
914 // 2) Sends and doWaitsRecv in endTransfer are seperated by
915 // copyAndPermute, giving MPI time to make progress.
916 //
917 // The downside of this approach is as follows. The imports view
918 // used for the receives is potentially aliased to a subview of
919 // the target. This means that MPI will modify the target in the
920 // memory space that is determined by GPU awareness
921 // (Behavior::assumeMpiIsGPUAware). Since copyAndPermute will also
922 // be writing to the target at the same time, it will need to
923 // modify target in the same space.
924 //
925 // Given these additional constraints, we currently only enable
926 // the overlapping of communication and computation when constantNumPackets > 0
927 // and Behavior::enableGranularTransfers().
928
929 const bool overlapTransferSteps = (constantNumPackets != 0) && Behavior::enableGranularTransfers();
930
931 if (verbose) {
932 std::ostringstream os;
933 os << *prefix << "overlapTransferSteps=" << overlapTransferSteps << endl;
934 std::cerr << os.str();
935 }
936
937 // Decide whether copyAndPermute needs to be run.
938 const bool thereAreIDsToCopy = (numSameIDs + permuteToLIDs.extent(0) != 0);
940
943 // copyAndPermute
944
945 // NOTE (mfh 26 Apr 2016) Chris Baker's implementation understood
946 // that if CM == INSERT || CM == REPLACE, the target object could
947 // be write only. We don't optimize for that here.
948
949 if (needCopyAndPermute) {
950 // There is at least one GID to copy or permute.
951
952 if (verbose) {
953 std::ostringstream os;
954 os << *prefix << "2. copyAndPermute" << endl;
955 std::cerr << os.str();
956 }
957 {
958 ProfilingRegion region_cp("Tpetra::DistObject::beginTransfer::copyAndPermute");
959
960 this->copyAndPermute(src, numSameIDs, permuteToLIDs, permuteFromLIDs, CM);
961 }
962 if (verbose) {
963 std::ostringstream os;
964 os << *prefix << "After copyAndPermute:" << endl
965 << *prefix << " "
966 << dualViewStatusToString(permuteToLIDs, "permuteToLIDs")
967 << endl
968 << *prefix << " "
969 << dualViewStatusToString(permuteFromLIDs, "permuteFromLIDs")
970 << endl;
971 std::cerr << os.str();
972 }
973 }
974
975 if (!needCommunication) {
976 if (verbose) {
977 std::ostringstream os;
978 os << *prefix << "Comm not needed; skipping" << endl;
979 std::cerr << os.str();
980 }
981 } else {
983 // packAndPrepare
984
985 if (constantNumPackets == 0) {
986 if (verbose) {
987 std::ostringstream os;
988 os << *prefix << "3. (Re)allocate num{Ex,Im}portPacketsPerLID"
989 << endl;
990 std::cerr << os.str();
991 }
992 // This only reallocates if necessary, that is, if the sizes
993 // don't match.
994 this->reallocArraysForNumPacketsPerLid(exportLIDs.extent(0),
995 remoteLIDs.extent(0));
996 }
997
998 if (verbose) {
999 std::ostringstream os;
1000 os << *prefix << "4. packAndPrepare: before, "
1001 << dualViewStatusToString(this->exports_, "exports_")
1002 << endl;
1003 std::cerr << os.str();
1005
1006 doPackAndPrepare(src, exportLIDs, constantNumPackets, execution_space());
1007 if (commOnHost) {
1008 this->exports_.sync_host();
1009 } else {
1010 this->exports_.sync_device();
1011 }
1012
1013 if (verbose) {
1014 std::ostringstream os;
1015 os << *prefix << "5.1. After packAndPrepare, "
1016 << dualViewStatusToString(this->exports_, "exports_")
1017 << endl;
1018 std::cerr << os.str();
1019 }
1020
1022 // reallocImportsIfNeeded
1023 if (constantNumPackets != 0) {
1024 ProfilingRegion region_reallocImportsIfNeeded("Tpetra::DistObject::beginTransfer::reallocImportsIfNeeded");
1025
1026 // There are a constant number of packets per element. We
1027 // already know (from the number of "remote" (incoming)
1028 // elements) how many incoming elements we expect, so we can
1029 // resize the buffer accordingly.
1030 const size_t rbufLen = remoteLIDs.extent(0) * constantNumPackets;
1031 reallocImportsIfNeeded(rbufLen, verbose, prefix.get(), canTryAliasing, CM);
1032 }
1033
1035 // doPostRecvs
1036
1037 // If only one round of communication is required: post receives.
1038 // If two rounds are required: complete first round and post receives for second round.
1039 if (verbose) {
1040 std::ostringstream os;
1041 os << *prefix << "7.0. "
1042 << (revOp == DoReverse ? "Reverse" : "Forward")
1043 << " mode" << endl;
1044 std::cerr << os.str();
1045 }
1046
1047 doPostRecvs(distributorPlan, constantNumPackets, commOnHost, prefix, canTryAliasing, CM);
1048
1050 // doPostSends
1051
1052 doPostSends(distributorPlan, constantNumPackets, commOnHost, prefix);
1053 } // if ( needCommunication )
1054
1055 } // if ( ! overlapTransferSteps )
1056 else {
1057 // Overlap local computation with communication
1058
1059 if (!needCommunication) {
1060 if (verbose) {
1061 std::ostringstream os;
1062 os << *prefix << "Comm not needed; skipping" << endl;
1063 std::cerr << os.str();
1064 }
1065 } else {
1067 // doPostRecvs
1068
1070 // reallocImportsIfNeeded
1071 if (constantNumPackets != 0) {
1072 ProfilingRegion region_reallocImportsIfNeeded("Tpetra::DistObject::beginTransfer::reallocImportsIfNeeded");
1073
1074 // There are a constant number of packets per element. We
1075 // already know (from the number of "remote" (incoming)
1076 // elements) how many incoming elements we expect, so we can
1077 // resize the buffer accordingly.
1078 const size_t rbufLen = remoteLIDs.extent(0) * constantNumPackets;
1079 reallocImportsIfNeeded(rbufLen, verbose, prefix.get(), canTryAliasing, CM);
1080 }
1081
1082 // If only one round of communication is required: post receives.
1083 // If two rounds are required: complete first round and post receives for second round.
1084 if (verbose) {
1085 std::ostringstream os;
1086 os << *prefix << "7.0. "
1087 << (revOp == DoReverse ? "Reverse" : "Forward")
1088 << " mode" << endl;
1089 std::cerr << os.str();
1090 }
1091
1092 doPostRecvs(distributorPlan, constantNumPackets, commOnHost, prefix, canTryAliasing, CM);
1093
1095 // packAndPrepare
1096
1097 if (constantNumPackets == 0) {
1098 if (verbose) {
1099 std::ostringstream os;
1100 os << *prefix << "3. (Re)allocate num{Ex,Im}portPacketsPerLID"
1101 << endl;
1102 std::cerr << os.str();
1103 }
1104 // This only reallocates if necessary, that is, if the sizes
1105 // don't match.
1106 this->reallocArraysForNumPacketsPerLid(exportLIDs.extent(0),
1107 remoteLIDs.extent(0));
1108 }
1109
1110 if (verbose) {
1111 std::ostringstream os;
1112 os << *prefix << "4. packAndPrepare: before, "
1113 << dualViewStatusToString(this->exports_, "exports_")
1114 << endl;
1115 std::cerr << os.str();
1116 }
1117
1118 doPackAndPrepare(src, exportLIDs, constantNumPackets, execution_space());
1119
1120 if (commOnHost) {
1121 this->exports_.sync_host();
1122 } else {
1123 this->exports_.sync_device();
1124 }
1125
1126 if (verbose) {
1127 std::ostringstream os;
1128 os << *prefix << "5.1. After packAndPrepare, "
1129 << dualViewStatusToString(this->exports_, "exports_")
1130 << endl;
1131 std::cerr << os.str();
1132 }
1133
1135 // doPostSends
1136
1137 doPostSends(distributorPlan, constantNumPackets, commOnHost, prefix);
1138
1139 } // if ( needCommunication )
1140
1142 // copyAndPermute
1143
1144 // NOTE (mfh 26 Apr 2016) Chris Baker's implementation understood
1145 // that if CM == INSERT || CM == REPLACE, the target object could
1146 // be write only. We don't optimize for that here.
1147
1148 if (needCopyAndPermute) {
1149 // There is at least one GID to copy or permute.
1150 if (verbose) {
1151 std::ostringstream os;
1152 os << *prefix << "2. copyAndPermute" << endl;
1153 std::cerr << os.str();
1154 }
1155
1156 {
1157 ProfilingRegion region_cp("Tpetra::DistObject::beginTransfer::copyAndPermute");
1158
1159 this->copyAndPermute(src, numSameIDs, permuteToLIDs, permuteFromLIDs, CM);
1160 }
1161
1162 if (verbose) {
1163 std::ostringstream os;
1164 os << *prefix << "After copyAndPermute:" << endl
1165 << *prefix << " "
1166 << dualViewStatusToString(permuteToLIDs, "permuteToLIDs")
1167 << endl
1168 << *prefix << " "
1169 << dualViewStatusToString(permuteFromLIDs, "permuteFromLIDs")
1170 << endl;
1171 std::cerr << os.str();
1172 }
1173 }
1174 } // if ( overlapTransferSteps )
1175}
1176
1177template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1178void DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
1179 endTransfer(const SrcDistObject& src,
1180 const ::Tpetra::Details::Transfer<local_ordinal_type, global_ordinal_type, node_type>& transfer,
1181 const char modeString[],
1182 const ReverseOption revOp,
1183 const CombineMode CM,
1184 bool restrictedMode) {
1185 using Details::Behavior;
1187 using Details::ProfilingRegion;
1188 using Kokkos::Compat::create_const_view;
1189 using Kokkos::Compat::getArrayView;
1190 using Kokkos::Compat::getConstArrayView;
1191 using Kokkos::Compat::getKokkosViewDeepCopy;
1192 using std::endl;
1193 using ::Tpetra::Details::dualViewStatusToString;
1194 using ::Tpetra::Details::getArrayViewFromDualView;
1195
1196 const bool commOnHost = !Behavior::assumeMpiIsGPUAware();
1197 const char funcNameHost[] = "Tpetra::DistObject::endTransfer[Host]";
1198 const char funcNameDevice[] = "Tpetra::DistObject::endTransfer[Device]";
1199 const char* funcName = commOnHost ? funcNameHost : funcNameDevice;
1200 ProfilingRegion region_doTransfer(funcName);
1201 const bool verbose = Behavior::verbose("DistObject");
1202 std::shared_ptr<std::string> prefix;
1203 if (verbose) {
1204 std::ostringstream os;
1205 prefix = this->createPrefix("DistObject", "doTransfer");
1206 os << *prefix << "Source type: " << Teuchos::typeName(src)
1207 << ", Target type: " << Teuchos::typeName(*this) << endl;
1208 std::cerr << os.str();
1209 }
1210
1211 // "Restricted Mode" does two things:
1212 // 1) Skips copyAndPermute
1213 // 2) Allows the "target" Map of the transfer to be a subset of
1214 // the Map of *this, in a "locallyFitted" sense.
1215 //
1216 // This cannot be used if #2 is not true, OR there are permutes.
1217 // Source Maps still need to match
1218
1219 // mfh 18 Oct 2017: Set TPETRA_DEBUG to true to enable extra debug
1220 // checks. These may communicate more.
1221 const bool debug = Behavior::debug("DistObject");
1222 if (debug) {
1223 if (!restrictedMode && revOp == DoForward) {
1224 const bool myMapSameAsTransferTgtMap =
1225 this->getMap()->isSameAs(*(transfer.getTargetMap()));
1226 TEUCHOS_TEST_FOR_EXCEPTION(!myMapSameAsTransferTgtMap, std::invalid_argument,
1227 "Tpetra::DistObject::" << modeString << ": For forward-mode "
1228 "communication, the target DistObject's Map must be the same "
1229 "(in the sense of Tpetra::Map::isSameAs) as the input "
1230 "Export/Import object's target Map.");
1231 } else if (!restrictedMode && revOp == DoReverse) {
1232 const bool myMapSameAsTransferSrcMap =
1233 this->getMap()->isSameAs(*(transfer.getSourceMap()));
1234 TEUCHOS_TEST_FOR_EXCEPTION(!myMapSameAsTransferSrcMap, std::invalid_argument,
1235 "Tpetra::DistObject::" << modeString << ": For reverse-mode "
1236 "communication, the target DistObject's Map must be the same "
1237 "(in the sense of Tpetra::Map::isSameAs) as the input "
1238 "Export/Import object's source Map.");
1239 } else if (restrictedMode && revOp == DoForward) {
1240 const bool myMapLocallyFittedTransferTgtMap =
1241 this->getMap()->isLocallyFitted(*(transfer.getTargetMap()));
1242 TEUCHOS_TEST_FOR_EXCEPTION(!myMapLocallyFittedTransferTgtMap, std::invalid_argument,
1243 "Tpetra::DistObject::" << modeString << ": For forward-mode "
1244 "communication using restricted mode, Export/Import object's "
1245 "target Map must be locally fitted (in the sense of "
1246 "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
1247 } else { // if (restrictedMode && revOp == DoReverse)
1248 const bool myMapLocallyFittedTransferSrcMap =
1249 this->getMap()->isLocallyFitted(*(transfer.getSourceMap()));
1250 TEUCHOS_TEST_FOR_EXCEPTION(!myMapLocallyFittedTransferSrcMap, std::invalid_argument,
1251 "Tpetra::DistObject::" << modeString << ": For reverse-mode "
1252 "communication using restricted mode, Export/Import object's "
1253 "source Map must be locally fitted (in the sense of "
1254 "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
1255 }
1256
1257 // SrcDistObject need not even _have_ Maps. However, if the
1258 // source object is a DistObject, it has a Map, and we may
1259 // compare that Map with the Transfer's Maps.
1260 const this_type* srcDistObj = dynamic_cast<const this_type*>(&src);
1261 if (srcDistObj != nullptr) {
1262 if (revOp == DoForward) {
1263 const bool srcMapSameAsImportSrcMap =
1264 srcDistObj->getMap()->isSameAs(*(transfer.getSourceMap()));
1265 TEUCHOS_TEST_FOR_EXCEPTION(!srcMapSameAsImportSrcMap, std::invalid_argument,
1266 "Tpetra::DistObject::" << modeString << ": For forward-mode "
1267 "communication, the source DistObject's Map must be the same "
1268 "as the input Export/Import object's source Map.");
1269 } else { // revOp == DoReverse
1270 const bool srcMapSameAsImportTgtMap =
1271 srcDistObj->getMap()->isSameAs(*(transfer.getTargetMap()));
1272 TEUCHOS_TEST_FOR_EXCEPTION(!srcMapSameAsImportTgtMap, std::invalid_argument,
1273 "Tpetra::DistObject::" << modeString << ": For reverse-mode "
1274 "communication, the source DistObject's Map must be the same "
1275 "as the input Export/Import object's target Map.");
1276 }
1277 }
1278 }
1279
1280 Distributor& distor = transfer.getDistributor();
1281 const Details::DistributorPlan& distributorPlan = (revOp == DoForward) ? distor.getPlan() : *distor.getPlan().getReversePlan();
1282
1283 TEUCHOS_TEST_FOR_EXCEPTION(debug && restrictedMode &&
1284 (transfer.getPermuteToLIDs_dv().extent(0) != 0 ||
1285 transfer.getPermuteFromLIDs_dv().extent(0) != 0),
1286 std::invalid_argument,
1287 "Tpetra::DistObject::" << modeString << ": Transfer object "
1288 "cannot have permutes in restricted mode.");
1289
1290 // Do we need all communication buffers to live on host?
1291 if (verbose) {
1292 std::ostringstream os;
1293 os << *prefix << "doTransfer: Use new interface; "
1294 "commOnHost="
1295 << (commOnHost ? "true" : "false") << endl;
1296 std::cerr << os.str();
1297 }
1298
1299 using const_lo_dv_type =
1300 Kokkos::DualView<const local_ordinal_type*, buffer_device_type>;
1301 const_lo_dv_type remoteLIDs = (revOp == DoForward) ? transfer.getRemoteLIDs_dv() : transfer.getExportLIDs_dv();
1302
1303 size_t constantNumPackets = this->constantNumberOfPackets();
1304
1305 // We only need to send data if the combine mode is not ZERO.
1306 if (CM != ZERO) {
1307 // Do we need to do communication (via doWaitsRecv and doWaitsSend)?
1308 bool needCommunication = true;
1309
1310 // This may be NULL. It will be used below.
1311 const this_type* srcDistObj = dynamic_cast<const this_type*>(&src);
1312
1313 if (revOp == DoReverse && !this->isDistributed()) {
1314 needCommunication = false;
1315 }
1316 // FIXME (mfh 30 Jun 2013): Checking whether the source object
1317 // is distributed requires a cast to DistObject. If it's not a
1318 // DistObject, then I'm not quite sure what to do. Perhaps it
1319 // would be more appropriate for SrcDistObject to have an
1320 // isDistributed() method. For now, I'll just assume that we
1321 // need to do communication unless the cast succeeds and the
1322 // source is not distributed.
1323 else if (revOp == DoForward && srcDistObj != NULL &&
1324 !srcDistObj->isDistributed()) {
1325 needCommunication = false;
1326 }
1327
1328 if (!needCommunication) {
1329 if (verbose) {
1330 std::ostringstream os;
1331 os << *prefix << "Comm not needed; skipping" << endl;
1332 std::cerr << os.str();
1333 }
1334 } else {
1335 distributorActor_.doWaitsRecv(distributorPlan);
1336
1337 if (verbose) {
1338 std::ostringstream os;
1339 os << *prefix << "8. unpackAndCombine - remoteLIDs " << remoteLIDs.extent(0) << ", constantNumPackets " << constantNumPackets << endl;
1340 std::cerr << os.str();
1341 }
1342 doUnpackAndCombine(remoteLIDs, constantNumPackets, CM, execution_space());
1343
1344 distributorActor_.doWaitsSend(distributorPlan);
1345 } // if (needCommunication)
1346 } // if (CM != ZERO)
1347
1348 if (verbose) {
1349 std::ostringstream os;
1350 os << *prefix << "9. Done!" << endl;
1351 std::cerr << os.str();
1352 }
1353
1354 if (verbose) {
1355 std::ostringstream os;
1356 os << *prefix << "Tpetra::DistObject::doTransfer: Done!" << endl;
1357 std::cerr << os.str();
1358 }
1359}
1360
1361template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1362void DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
1363 doPostRecvs(const Details::DistributorPlan& distributorPlan,
1364 size_t constantNumPackets,
1365 bool commOnHost,
1366 std::shared_ptr<std::string> prefix,
1367 const bool canTryAliasing,
1368 const CombineMode CM) {
1369 using Details::ProfilingRegion;
1370 using Kokkos::Compat::create_const_view;
1371 using std::endl;
1372 using ::Tpetra::Details::dualViewStatusToString;
1373 using ::Tpetra::Details::getArrayViewFromDualView;
1374
1375 const char funcNameHost[] = "Tpetra::DistObject::doPostRecvs[Host]";
1376 const char funcNameDevice[] = "Tpetra::DistObject::doPostRecvs[Device]";
1377 const char* funcName = commOnHost ? funcNameHost : funcNameDevice;
1378 ProfilingRegion region_dpr(funcName);
1379
1380 const bool verbose = Details::Behavior::verbose("DistObject");
1381
1382 if (constantNumPackets == 0) { // variable num packets per LID
1383 if (verbose) {
1384 std::ostringstream os;
1385 os << *prefix << "7.1. Variable # packets / LID: first comm "
1386 << "(commOnHost = " << (commOnHost ? "true" : "false") << ")"
1387 << endl;
1388 std::cerr << os.str();
1389 }
1390 size_t totalImportPackets = 0;
1391 if (commOnHost) {
1392 if (this->numExportPacketsPerLID_.need_sync_host()) {
1393 this->numExportPacketsPerLID_.sync_host();
1394 }
1395 if (this->numImportPacketsPerLID_.need_sync_host()) {
1396 this->numImportPacketsPerLID_.sync_host();
1397 }
1398 this->numImportPacketsPerLID_.modify_host(); // out arg
1399 auto numExp_h =
1400 create_const_view(this->numExportPacketsPerLID_.view_host());
1401 auto numImp_h = this->numImportPacketsPerLID_.view_host();
1402
1403 // MPI communication happens here.
1404 if (verbose) {
1405 std::ostringstream os;
1406 os << *prefix << "Call doPostsAndWaits"
1407 << endl;
1408 std::cerr << os.str();
1409 }
1410 distributorActor_.doPostsAndWaits(distributorPlan, numExp_h, 1, numImp_h);
1411
1412 if (verbose) {
1413 std::ostringstream os;
1414 os << *prefix << "Count totalImportPackets" << std::endl;
1415 std::cerr << os.str();
1416 }
1417 using the_dev_type = typename decltype(numImp_h)::device_type;
1418 totalImportPackets = countTotalImportPackets<the_dev_type>(numImp_h);
1419 } else { // ! commOnHost
1420 this->numExportPacketsPerLID_.sync_device();
1421 this->numImportPacketsPerLID_.sync_device();
1422 this->numImportPacketsPerLID_.modify_device(); // out arg
1423 auto numExp_d = create_const_view(this->numExportPacketsPerLID_.view_device());
1424 auto numImp_d = this->numImportPacketsPerLID_.view_device();
1425
1426 // MPI communication happens here.
1427 if (verbose) {
1428 std::ostringstream os;
1429 os << *prefix << "Call doPostsAndWaits"
1430 << endl;
1431 std::cerr << os.str();
1432 }
1433
1434 distributorActor_.doPostsAndWaits(distributorPlan, numExp_d, 1, numImp_d);
1435
1436 if (verbose) {
1437 std::ostringstream os;
1438 os << *prefix << "Count totalImportPackets" << std::endl;
1439 std::cerr << os.str();
1440 }
1441 using the_dev_type = typename decltype(numImp_d)::device_type;
1442 totalImportPackets = countTotalImportPackets<the_dev_type>(numImp_d);
1443 }
1444
1445 if (verbose) {
1446 std::ostringstream os;
1447 os << *prefix << "totalImportPackets=" << totalImportPackets << endl;
1448 std::cerr << os.str();
1449 }
1450 this->reallocImportsIfNeeded(totalImportPackets, verbose,
1451 prefix.get(), canTryAliasing, CM);
1452 if (verbose) {
1453 std::ostringstream os;
1454 os << *prefix << "7.3. Second comm" << std::endl;
1455 std::cerr << os.str();
1456 }
1457
1458 // mfh 04 Feb 2019: Distributor expects the "num packets per
1459 // LID" arrays on host, so that it can issue MPI sends and
1460 // receives correctly.
1461 this->numImportPacketsPerLID_.sync_host();
1462
1463 // NOTE (mfh 25 Apr 2016, 01 Aug 2017) doPostsAndWaits and
1464 // doReversePostsAndWaits currently want
1465 // numExportPacketsPerLID and numImportPacketsPerLID as
1466 // Teuchos::ArrayView, rather than as Kokkos::View.
1467 //
1468 // NOTE (mfh 04 Feb 2019) This does NOT copy from host to
1469 // device. The above syncs might.
1470 auto numImportPacketsPerLID_av =
1471 getArrayViewFromDualView(this->numImportPacketsPerLID_);
1472
1473 // imports_ is for output only, so we don't need to sync it
1474 // before marking it as modified. However, in order to
1475 // prevent spurious debug-mode errors (e.g., "modified on
1476 // both device and host"), we first need to clear its
1477 // "modified" flags.
1478 this->imports_.clear_sync_state();
1479
1480 if (verbose) {
1481 std::ostringstream os;
1482 os << *prefix << "Comm on "
1483 << (commOnHost ? "host" : "device")
1484 << "; call doPostRecvs" << endl;
1485 std::cerr << os.str();
1486 }
1487
1488 if (commOnHost) {
1489 this->imports_.modify_host();
1490 distributorActor_.doPostRecvs(distributorPlan,
1491 this->imports_.view_host(),
1492 numImportPacketsPerLID_av);
1493 } else { // pack on device
1494 this->imports_.modify_device();
1495 distributorActor_.doPostRecvs(distributorPlan,
1496 this->imports_.view_device(),
1497 numImportPacketsPerLID_av);
1498 }
1499 } else { // constant number of packets per LID
1500 if (verbose) {
1501 std::ostringstream os;
1502 os << *prefix << "7.1. Const # packets per LID: " << endl
1503 << *prefix << " "
1504 << dualViewStatusToString(this->exports_, "exports_")
1505 << endl
1506 << *prefix << " "
1507 << dualViewStatusToString(this->exports_, "imports_")
1508 << endl;
1509 std::cerr << os.str();
1510 }
1511 // imports_ is for output only, so we don't need to sync it
1512 // before marking it as modified. However, in order to
1513 // prevent spurious debug-mode errors (e.g., "modified on
1514 // both device and host"), we first need to clear its
1515 // "modified" flags.
1516 this->imports_.clear_sync_state();
1517
1518 if (verbose) {
1519 std::ostringstream os;
1520 os << *prefix << "7.2. Comm on "
1521 << (commOnHost ? "host" : "device")
1522 << "; call doPostRecvs" << endl;
1523 std::cerr << os.str();
1524 }
1525 if (commOnHost) {
1526 this->imports_.modify_host();
1527 distributorActor_.doPostRecvs(distributorPlan,
1528 constantNumPackets,
1529 this->imports_.view_host());
1530 } else { // pack on device
1531 this->imports_.modify_device();
1532 distributorActor_.doPostRecvs(distributorPlan,
1533 constantNumPackets,
1534 this->imports_.view_device());
1535 } // commOnHost
1536 } // constant or variable num packets per LID
1537}
1538
1539template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1540void DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
1541 doPostSends(const Details::DistributorPlan& distributorPlan,
1542 size_t constantNumPackets,
1543 bool commOnHost,
1544 std::shared_ptr<std::string> prefix) {
1545 using Details::ProfilingRegion;
1546 using Kokkos::Compat::create_const_view;
1547 using std::endl;
1548 using ::Tpetra::Details::getArrayViewFromDualView;
1549
1550 const char funcNameHost[] = "Tpetra::DistObject::doPostSends[Host]";
1551 const char funcNameDevice[] = "Tpetra::DistObject::doPostSends[Device]";
1552 const char* funcName = commOnHost ? funcNameHost : funcNameDevice;
1553 ProfilingRegion region_dps(funcName);
1554
1555 const bool verbose = Details::Behavior::verbose("DistObject");
1556 if (verbose) {
1557 std::ostringstream os;
1558 os << *prefix << "Comm on "
1559 << (commOnHost ? "host" : "device")
1560 << "; call doPostSends" << endl;
1561 std::cerr << os.str();
1562 }
1563
1564 if (constantNumPackets == 0) { // variable num packets per LID
1565 // mfh 04 Feb 2019: Distributor expects the "num packets per
1566 // LID" arrays on host, so that it can issue MPI sends and
1567 // receives correctly.
1568 this->numExportPacketsPerLID_.sync_host();
1569 this->numImportPacketsPerLID_.sync_host();
1570
1571 // NOTE (mfh 25 Apr 2016, 01 Aug 2017) doPostsAndWaits and
1572 // doReversePostsAndWaits currently want
1573 // numExportPacketsPerLID and numImportPacketsPerLID as
1574 // Teuchos::ArrayView, rather than as Kokkos::View.
1575 //
1576 // NOTE (mfh 04 Feb 2019) This does NOT copy from host to
1577 // device. The above syncs might.
1578 auto numExportPacketsPerLID_av =
1579 getArrayViewFromDualView(this->numExportPacketsPerLID_);
1580 auto numImportPacketsPerLID_av =
1581 getArrayViewFromDualView(this->numImportPacketsPerLID_);
1582
1583 if (commOnHost) {
1584 distributorActor_.doPostSends(distributorPlan,
1585 create_const_view(this->exports_.view_host()),
1586 numExportPacketsPerLID_av,
1587 this->imports_.view_host(),
1588 numImportPacketsPerLID_av);
1589 } else { // pack on device
1590 // We need to guarantee that packAndPrepare is done before we send.
1591 Kokkos::fence("DistObject::doPostSends-1"); // for UVM
1592 distributorActor_.doPostSends(distributorPlan,
1593 create_const_view(this->exports_.view_device()),
1594 numExportPacketsPerLID_av,
1595 this->imports_.view_device(),
1596 numImportPacketsPerLID_av);
1597 }
1598 } else { // constant number of packets per LID
1599 if (commOnHost) {
1600 distributorActor_.doPostSends(distributorPlan,
1601 create_const_view(this->exports_.view_host()),
1602 constantNumPackets,
1603 this->imports_.view_host());
1604 } else { // pack on device
1605 // We need to guarantee that packAndPrepare is done before we send.
1606 Kokkos::fence("DistObject::doPostSends-2"); // for UVM
1607 distributorActor_.doPostSends(distributorPlan,
1608 create_const_view(this->exports_.view_device()),
1609 constantNumPackets,
1610 this->imports_.view_device());
1611 } // commOnHost
1612 } // constant or variable num packets per LID
1613}
1614
1615template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1616void DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
1617 doPackAndPrepare(const SrcDistObject& src,
1618 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
1619 size_t& constantNumPackets,
1620 const execution_space& space) {
1621 using Details::ProfilingRegion;
1622 using std::endl;
1623 const bool debug = Details::Behavior::debug("DistObject");
1624
1625 ProfilingRegion region_pp("Tpetra::DistObject::doPackAndPrepare");
1626
1627 // Ask the source to pack data. Also ask it whether there are
1628 // a constant number of packets per element
1629 // (constantNumPackets is an output argument). If there are,
1630 // constantNumPackets will come back nonzero. Otherwise, the
1631 // source will fill the numExportPacketsPerLID_ array.
1632
1633 // FIXME (mfh 18 Oct 2017) if (! commOnHost), sync to device?
1634 // Alternately, make packAndPrepare take a "commOnHost"
1635 // argument to tell it where to leave the data?
1636 //
1637 // NOTE (mfh 04 Feb 2019) Subclasses of DistObject should have
1638 // the freedom to pack and unpack either on host or device.
1639 // We should prefer sync'ing only on demand. Thus, we can
1640 // answer the above question: packAndPrepare should not
1641 // take a commOnHost argument, and doTransferNew should sync
1642 // where needed, if needed.
1643 if (debug) {
1644 std::ostringstream lclErrStrm;
1645 bool lclSuccess = false;
1646 try {
1647 this->packAndPrepare(src, exportLIDs, this->exports_,
1648 this->numExportPacketsPerLID_,
1649 constantNumPackets, space);
1650 lclSuccess = true;
1651 } catch (std::exception& e) {
1652 lclErrStrm << "packAndPrepare threw an exception: "
1653 << endl
1654 << e.what();
1655 } catch (...) {
1656 lclErrStrm << "packAndPrepare threw an exception "
1657 "not a subclass of std::exception.";
1658 }
1659 const char gblErrMsgHeader[] =
1660 "Tpetra::DistObject "
1661 "threw an exception in packAndPrepare on "
1662 "one or more processes in the DistObject's communicator.";
1663 auto comm = getMap()->getComm();
1664 Details::checkGlobalError(std::cerr, lclSuccess,
1665 lclErrStrm.str().c_str(),
1666 gblErrMsgHeader, *comm);
1667 } else {
1668 this->packAndPrepare(src, exportLIDs, this->exports_,
1669 this->numExportPacketsPerLID_,
1670 constantNumPackets, space);
1671 }
1672}
1673
1674template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1675void DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
1676 doUnpackAndCombine(const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& remoteLIDs,
1677 size_t constantNumPackets,
1678 CombineMode CM,
1679 const execution_space& space) {
1680 using Details::ProfilingRegion;
1681 using std::endl;
1682 const bool debug = Details::Behavior::debug("DistObject");
1683
1684 ProfilingRegion region_uc("Tpetra::DistObject::doUnpackAndCombine");
1685
1686 if (debug) {
1687 std::ostringstream lclErrStrm;
1688 bool lclSuccess = false;
1689 try {
1690 this->unpackAndCombine(remoteLIDs, this->imports_,
1691 this->numImportPacketsPerLID_,
1692 constantNumPackets, CM, space);
1693 lclSuccess = true;
1694 } catch (std::exception& e) {
1695 lclErrStrm << "doUnpackAndCombine threw an exception: "
1696 << endl
1697 << e.what();
1698 } catch (...) {
1699 lclErrStrm << "doUnpackAndCombine threw an exception "
1700 "not a subclass of std::exception.";
1701 }
1702 const char gblErrMsgHeader[] =
1703 "Tpetra::DistObject "
1704 "threw an exception in unpackAndCombine on "
1705 "one or more processes in the DistObject's communicator.";
1706 auto comm = getMap()->getComm();
1707 Details::checkGlobalError(std::cerr, lclSuccess,
1708 lclErrStrm.str().c_str(),
1709 gblErrMsgHeader, *comm);
1710 } else {
1711 this->unpackAndCombine(remoteLIDs, this->imports_,
1712 this->numImportPacketsPerLID_,
1713 constantNumPackets, CM, space);
1714 }
1715}
1716
1717template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1720 const size_t,
1721 const Kokkos::DualView<
1722 const local_ordinal_type*,
1724 const Kokkos::DualView<
1725 const local_ordinal_type*,
1727 const CombineMode CM) {}
1728
1729template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1731 const SrcDistObject& source, const size_t numSameIDs,
1732 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs,
1733 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs,
1734 const CombineMode CM, const execution_space& space) {
1735 /*
1736 This is called if the derived class doesn't know how to pack and prepare in
1737 an arbitrary execution space instance, but it was asked to anyway.
1738 Provide a safe illusion by actually doing the work in the default instance,
1739 and syncing the default instance with the provided instance.
1740 The caller expects
1741 1. any work in the provided instance to complete before this.
1742 2. This to complete before any following work in the provided instance.
1743 */
1744
1745 space.fence("Tpetra::DistObject::copyAndPermute-1"); // // TODO: Tpetra::Details::Spaces::exec_space_wait
1746 copyAndPermute(source, numSameIDs, permuteToLIDs, permuteFromLIDs,
1747 CM); // default instance
1748 execution_space().fence("Tpetra::DistObject::copyAndPermute-2"); // TODO:
1749 // Tpetra::Details::Spaces::exec_space_wait
1750}
1751
1752template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1755 const Kokkos::DualView<
1756 const local_ordinal_type*,
1758 Kokkos::DualView<
1759 packet_type*,
1761 Kokkos::DualView<
1762 size_t*,
1764 size_t&) {}
1765
1766template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1768 const SrcDistObject& source,
1769 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
1770 Kokkos::DualView<packet_type*, buffer_device_type>& exports,
1771 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
1772 size_t& constantNumPackets, const execution_space& space) {
1773 /*
1774 This is called if the derived class doesn't know how to pack and prepare in
1775 an arbitrary execution space instance, but it was asked to anyway.
1776 Provide a safe illusion by actually doing the work in the default instance,
1777 and syncing the default instance with the provided instance.
1778
1779 The caller expects
1780 1. any work in the provided instance to complete before this.
1781 2. This to complete before any following work in the provided instance.
1782 */
1783
1784 // wait for any work from prior operations in the provided instance to
1785 // complete
1786 space.fence("Tpetra::DistObject::packAndPrepare-1"); // TODO: Details::Spaces::exec_space_wait
1787
1788 // pack and prepare in the default instance.
1789 packAndPrepare(source, exportLIDs, exports, numPacketsPerLID,
1790 constantNumPackets); // default instance
1791
1792 // wait for the default instance to complete before returning, so any
1793 // following work inserted into the provided instance will be done after this
1794 execution_space().fence("Tpetra::DistObject::packAndPrepare-2"); // TODO: Details::Spaces::exec_space_wait
1795}
1796
1797template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1799 unpackAndCombine(const Kokkos::DualView<
1800 const local_ordinal_type*,
1801 buffer_device_type>& /* importLIDs */,
1802 Kokkos::DualView<
1803 packet_type*,
1804 buffer_device_type> /* imports */,
1805 Kokkos::DualView<
1806 size_t*,
1807 buffer_device_type> /* numPacketsPerLID */,
1808 const size_t /* constantNumPackets */,
1809 const CombineMode /* combineMode */) {}
1810
1811template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1813 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& importLIDs,
1814 Kokkos::DualView<packet_type*, buffer_device_type> imports,
1815 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
1816 const size_t constantNumPackets, const CombineMode combineMode,
1817 const execution_space& space) {
1818 // Wait for any work in the provided space to complete
1819 space.fence("Tpetra::DistObject::unpackAndCombine-1"); // TODO: Details::Spaces::exec_space_wait(execution_space(),
1820 // space);
1821 unpackAndCombine(importLIDs, imports, numPacketsPerLID, constantNumPackets,
1822 combineMode); // default instance
1823 // wait for unpack to finish in the default instance, since the caller
1824 // may be expecting sequential semantics in the `space` instance
1825 execution_space().fence("Tpetra::DistObject::unpackAndCombine-2"); // TODO: Details::Spaces::exec_space_wait(space,
1826 // execution_space());
1827}
1828
1829template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1831 std::ostream& os) const {
1832 using std::endl;
1833 using Teuchos::FancyOStream;
1834 using Teuchos::getFancyOStream;
1835 using Teuchos::RCP;
1836 using Teuchos::rcpFromRef;
1837
1839 this->describe(*out, Teuchos::VERB_DEFAULT);
1840}
1841
1842template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1843std::unique_ptr<std::string>
1845 const char className[], const char methodName[]) const {
1846 auto map = this->getMap();
1847 auto comm = map.is_null() ? Teuchos::null : map->getComm();
1848 return Details::createPrefix(comm.getRawPtr(), className, methodName);
1849}
1850
1851template <class DistObjectType>
1853 Teuchos::RCP<DistObjectType>& input,
1854 const Teuchos::RCP<const Map<typename DistObjectType::local_ordinal_type,
1855 typename DistObjectType::global_ordinal_type,
1856 typename DistObjectType::node_type>>& newMap) {
1857 input->removeEmptyProcessesInPlace(newMap);
1858 if (newMap.is_null()) { // my process is excluded
1859 input = Teuchos::null;
1860 }
1861}
1862
1863template <class DistObjectType>
1864void removeEmptyProcessesInPlace(Teuchos::RCP<DistObjectType>& input) {
1865 auto newMap = input->getMap()->removeEmptyProcesses();
1867}
1868
1869// Explicit instantiation macro for general DistObject.
1870#define TPETRA_DISTOBJECT_INSTANT(SCALAR, LO, GO, NODE) \
1871 template class DistObject<SCALAR, LO, GO, NODE>;
1872
1873// Explicit instantiation macro for DistObject<char, ...>.
1874// The "SLGN" stuff above doesn't work for Packet=char.
1875#define TPETRA_DISTOBJECT_INSTANT_CHAR(LO, GO, NODE) \
1876 template class DistObject<char, LO, GO, NODE>;
1877
1878} // namespace Tpetra
1879
1880#endif // TPETRA_DISTOBJECT_DEF_HPP
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Declaration and definition of Tpetra::Details::reallocDualViewIfNeeded, an implementation detail of T...
void unpackAndCombine(const RowView &row_ptrs_beg, const RowView &row_ptrs_end, IndicesView &indices, const Kokkos::View< const GlobalOrdinal *, BufferDevice, Kokkos::MemoryUnmanaged > &imports, const Kokkos::View< const size_t *, BufferDevice, Kokkos::MemoryUnmanaged > &num_packets_per_lid, const Kokkos::View< const LocalOrdinal *, BufferDevice, Kokkos::MemoryUnmanaged > &import_lids, const typename CrsGraph< LocalOrdinal, GlobalOrdinal, Node >::padding_type &padding, const bool unpack_pids, const int myRank, const bool verbose)
Perform the unpack operation for the graph.
Stand-alone utility functions and macros.
Struct that holds views of the contents of a CrsMatrix.
Description of Tpetra's behavior.
static bool debug()
Whether Tpetra is in debug mode.
static bool verbose()
Whether Tpetra is in verbose mode.
Base class for distributed Tpetra objects that support data redistribution.
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print a descriptiion of this object to the given output stream.
virtual bool reallocImportsIfNeeded(const size_t newSize, const bool verbose, const std::string *prefix, const bool remoteLIDsContiguous=false, const CombineMode CM=INSERT)
Reallocate imports_ if needed.
virtual bool reallocArraysForNumPacketsPerLid(const size_t numExportLIDs, const size_t numImportLIDs)
Reallocate numExportPacketsPerLID_ and/or numImportPacketsPerLID_, if necessary.
void doImport(const SrcDistObject &source, const Import< LocalOrdinal, GlobalOrdinal, Node > &importer, const CombineMode CM, const bool restrictedMode=false)
Import data into this object using an Import object ("forward mode").
void beginTransfer(const SrcDistObject &src, const ::Tpetra::Details::Transfer< local_ordinal_type, global_ordinal_type, node_type > &transfer, const char modeString[], const ReverseOption revOp, const CombineMode CM, const bool restrictedMode)
Implementation detail of doTransfer.
DistObject(const Teuchos::RCP< const map_type > &map)
Constructor.
bool transferArrived() const
Whether the data from an import/export operation has arrived, and is ready for the unpack and combine...
virtual void packAndPrepare(const SrcDistObject &source, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Kokkos::DualView< packet_type *, buffer_device_type > &exports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, size_t &constantNumPackets)
Pack data and metadata for communication (sends).
Kokkos::Device< typename device_type::execution_space, buffer_memory_space > buffer_device_type
Kokkos::Device specialization for communication buffers.
LocalOrdinal local_ordinal_type
The type of local indices.
typename ::Kokkos::ArithTraits< Packet >::val_type packet_type
The type of each datum being sent or received in an Import or Export.
virtual void doTransfer(const SrcDistObject &src, const ::Tpetra::Details::Transfer< local_ordinal_type, global_ordinal_type, node_type > &transfer, const char modeString[], const ReverseOption revOp, const CombineMode CM, const bool restrictedMode)
Redistribute data across (MPI) processes.
typename device_type::execution_space execution_space
The Kokkos execution space.
void print(std::ostream &os) const
Print this object to the given output stream.
virtual void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< packet_type *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode)
Perform any unpacking and combining after communication.
virtual void copyAndPermute(const SrcDistObject &source, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM)
Perform copies and permutations that are local to the calling (MPI) process.
ReverseOption
Whether the data transfer should be performed in forward or reverse mode.
virtual size_t constantNumberOfPackets() const
Whether the implementation's instance promises always to have a constant number of packets per LID (l...
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, const CombineMode CM, const bool restrictedMode=false)
Export data into this object using an Export object ("forward mode").
virtual std::string description() const
One-line descriptiion of this object.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap)
Remove processes which contain no entries in this object's Map.
bool isDistributed() const
Whether this is a globally distributed object.
A parallel distribution of indices over processes.
Abstract base class for objects that can be the source of an Import or Export operation.
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
Kokkos::DualView< T *, DT > getDualViewCopyFromArrayView(const Teuchos::ArrayView< const T > &x_av, const char label[], const bool leaveOnHost)
Get a 1-D Kokkos::DualView which is a deep copy of the input Teuchos::ArrayView (which views host mem...
std::string dualViewStatusToString(const DualViewType &dv, const char name[])
Return the status of the given Kokkos::DualView, as a human-readable string.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void removeEmptyProcessesInPlace(Teuchos::RCP< DistObjectType > &input, const Teuchos::RCP< const Map< typename DistObjectType::local_ordinal_type, typename DistObjectType::global_ordinal_type, typename DistObjectType::node_type > > &newMap)
Remove processes which contain no elements in this object's Map.
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
CombineMode
Rule for combining data in an Import or Export.
@ ZERO
Replace old values with zero.