Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Export_def.hpp
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_EXPORT_DEF_HPP
11#define TPETRA_EXPORT_DEF_HPP
12
13#include "Tpetra_Distributor.hpp"
14#include "Tpetra_Map.hpp"
15#include "Tpetra_ImportExportData.hpp"
16#include "Tpetra_Util.hpp"
17#include "Tpetra_Import.hpp"
18#include "Tpetra_Details_DualViewUtil.hpp"
20#include "Teuchos_as.hpp"
21#include "Teuchos_Array.hpp"
22#include "Teuchos_FancyOStream.hpp"
23#include "Teuchos_ParameterList.hpp"
24#include <memory>
25
26namespace Tpetra {
27
28template <class LocalOrdinal, class GlobalOrdinal, class Node>
30 Export(const Teuchos::RCP<const map_type>& source,
31 const Teuchos::RCP<const map_type>& target,
32 const Teuchos::RCP<Teuchos::FancyOStream>& out,
33 const Teuchos::RCP<Teuchos::ParameterList>& plist)
34 : base_type(source, target, out, plist, "Export") {
35 using std::endl;
36 using Teuchos::rcp;
37 using ::Tpetra::Details::ProfilingRegion;
38 ProfilingRegion regionExport("Tpetra::Export::Export");
39
40 if (this->verbose()) {
41 std::ostringstream os;
42 const int myRank = source->getComm()->getRank();
43 os << myRank << ": Export ctor" << endl;
44 this->verboseOutputStream() << os.str();
45 }
46 Teuchos::Array<GlobalOrdinal> exportGIDs;
47 setupSamePermuteExport(exportGIDs);
48 if (source->isDistributed()) {
49 setupRemote(exportGIDs);
50 }
51
52 TEUCHOS_ASSERT(!this->TransferData_->permuteFromLIDs_.need_sync_device());
53 TEUCHOS_ASSERT(!this->TransferData_->permuteFromLIDs_.need_sync_host());
54 TEUCHOS_ASSERT(!this->TransferData_->permuteToLIDs_.need_sync_device());
55 TEUCHOS_ASSERT(!this->TransferData_->permuteToLIDs_.need_sync_host());
56 TEUCHOS_ASSERT(!this->TransferData_->remoteLIDs_.need_sync_device());
57 TEUCHOS_ASSERT(!this->TransferData_->remoteLIDs_.need_sync_host());
58 TEUCHOS_ASSERT(!this->TransferData_->exportLIDs_.need_sync_device());
59 TEUCHOS_ASSERT(!this->TransferData_->exportLIDs_.need_sync_host());
60
61 this->detectRemoteExportLIDsContiguous();
62
63 if (this->verbose()) {
64 std::ostringstream os;
65 const int myRank = source->getComm()->getRank();
66 os << myRank << ": Export ctor: done" << endl;
67 this->verboseOutputStream() << os.str();
68 }
69}
70
71template <class LocalOrdinal, class GlobalOrdinal, class Node>
73 Export(const Teuchos::RCP<const map_type>& source,
74 const Teuchos::RCP<const map_type>& target)
75 : Export(source, target, Teuchos::null, Teuchos::null) {}
76
77template <class LocalOrdinal, class GlobalOrdinal, class Node>
79 Export(const Teuchos::RCP<const map_type>& source,
80 const Teuchos::RCP<const map_type>& target,
81 const Teuchos::RCP<Teuchos::FancyOStream>& out)
82 : Export(source, target, out, Teuchos::null) {}
83
84template <class LocalOrdinal, class GlobalOrdinal, class Node>
86 Export(const Teuchos::RCP<const map_type>& source,
87 const Teuchos::RCP<const map_type>& target,
88 const Teuchos::RCP<Teuchos::ParameterList>& plist)
89 : Export(source, target, Teuchos::null, plist) {}
90
91template <class LocalOrdinal, class GlobalOrdinal, class Node>
95
96template <class LocalOrdinal, class GlobalOrdinal, class Node>
100
101template <class LocalOrdinal, class GlobalOrdinal, class Node>
103 describe(Teuchos::FancyOStream& out,
104 const Teuchos::EVerbosityLevel verbLevel) const {
105 // Call the base class' method. It does all the work.
106 this->describeImpl(out, "Tpetra::Export", verbLevel);
107}
108
109template <class LocalOrdinal, class GlobalOrdinal, class Node>
111 print(std::ostream& os) const {
112 auto out = Teuchos::getFancyOStream(Teuchos::rcpFromRef(os));
113 // "Print" traditionally meant "everything."
114 this->describe(*out, Teuchos::VERB_EXTREME);
115}
116
117template <class LocalOrdinal, class GlobalOrdinal, class Node>
119 setupSamePermuteExport(Teuchos::Array<GlobalOrdinal>& exportGIDs) {
120 using std::endl;
121 using Teuchos::arcp;
122 using Teuchos::Array;
123 using Teuchos::ArrayRCP;
124 using Teuchos::ArrayView;
125 using Teuchos::as;
126 using Teuchos::null;
127 using ::Tpetra::Details::makeDualViewFromOwningHostView;
128 using ::Tpetra::Details::ProfilingRegion;
129 using ::Tpetra::Details::view_alloc_no_init;
130 using LO = LocalOrdinal;
131 using GO = GlobalOrdinal;
132 using size_type = typename ArrayView<const GO>::size_type;
133 const char tfecfFuncName[] = "setupSamePermuteExport: ";
134 ProfilingRegion regionExport("Tpetra::Export::setupSamePermuteExport");
135
136 std::unique_ptr<std::string> prefix;
137 if (this->verbose()) {
138 auto srcMap = this->getSourceMap();
139 auto comm = srcMap.is_null() ? Teuchos::null : srcMap->getComm();
140 const int myRank = comm.is_null() ? -1 : comm->getRank();
141
142 std::ostringstream os;
143 os << "Proc " << myRank << ": Tpetra::Export::setupSamePermuteExport: ";
144 prefix = std::unique_ptr<std::string>(new std::string(os.str()));
145
146 std::ostringstream os2;
147 os2 << *prefix << "Start" << std::endl;
148 this->verboseOutputStream() << os2.str();
149 }
150
151 const map_type& source = *(this->getSourceMap());
152 const map_type& target = *(this->getTargetMap());
153 ArrayView<const GO> sourceGIDs = source.getLocalElementList();
154 ArrayView<const GO> targetGIDs = target.getLocalElementList();
155
156#ifdef HAVE_TPETRA_DEBUG
157 ArrayView<const GO> rawSrcGids = sourceGIDs;
158 ArrayView<const GO> rawTgtGids = targetGIDs;
159#else
160 const GO* const rawSrcGids = sourceGIDs.getRawPtr();
161 const GO* const rawTgtGids = targetGIDs.getRawPtr();
162#endif // HAVE_TPETRA_DEBUG
163 const size_type numSrcGids = sourceGIDs.size();
164 const size_type numTgtGids = targetGIDs.size();
165 const size_type numGids = std::min(numSrcGids, numTgtGids);
166
167 // Compute numSameIDs_: the number of initial GIDs that are the
168 // same (and occur in the same order) in both Maps. The point of
169 // numSameIDs_ is for the common case of an Export where all the
170 // overlapping GIDs are at the end of the source Map, but
171 // otherwise the source and target Maps are the same. This allows
172 // a fast contiguous copy for the initial "same IDs."
173 size_type numSameGids = 0;
174 for (; numSameGids < numGids &&
175 rawSrcGids[numSameGids] == rawTgtGids[numSameGids];
176 ++numSameGids) {
177 } // third clause of 'for' does everything
178 this->TransferData_->numSameIDs_ = numSameGids;
179
180 if (this->verbose()) {
181 std::ostringstream os;
182 os << *prefix << "numIDs: " << numGids
183 << ", numSameIDs: " << numSameGids << endl;
184 this->verboseOutputStream() << os.str();
185 }
186
187 // Compute permuteToLIDs_, permuteFromLIDs_, exportGIDs, and
188 // exportLIDs_. The first two arrays are IDs to be permuted, and
189 // the latter two arrays are IDs to sent out ("exported"), called
190 // "export" IDs.
191 //
192 // IDs to permute are in both the source and target Maps, which
193 // means we don't have to send or receive them, but we do have to
194 // rearrange (permute) them in general. IDs to send are in the
195 // source Map, but not in the target Map.
196
197 // Iterate over the source Map's LIDs, since we only need to do
198 // GID -> LID lookups for the target Map.
199 const LO LINVALID = Teuchos::OrdinalTraits<LO>::invalid();
200 const LO numSrcLids = static_cast<LO>(numSrcGids);
201 LO numPermutes = 0;
202 LO numExports = 0;
203
204 for (LO srcLid = numSameGids; srcLid < numSrcLids; ++srcLid) {
205 const GO curSrcGid = rawSrcGids[srcLid];
206 // getLocalElement() returns LINVALID if the GID isn't in the
207 // target Map. This saves us a lookup (which
208 // isNodeGlobalElement() would do).
209 const LO tgtLid = target.getLocalElement(curSrcGid);
210 if (tgtLid != LINVALID) { // if target.isNodeGlobalElement (curSrcGid)
211 ++numPermutes;
212 } else {
213 ++numExports;
214 }
215 }
216 if (this->verbose()) {
217 std::ostringstream os;
218 os << *prefix << "numPermutes: " << numPermutes
219 << ", numExports: " << numExports << endl;
220 this->verboseOutputStream() << os.str();
221 }
222 TEUCHOS_ASSERT(numPermutes + numExports ==
223 numSrcLids - numSameGids);
224
225 typename decltype(this->TransferData_->permuteToLIDs_)::t_host
226 permuteToLIDs(view_alloc_no_init("permuteToLIDs"), numPermutes);
227 typename decltype(this->TransferData_->permuteToLIDs_)::t_host
228 permuteFromLIDs(view_alloc_no_init("permuteFromLIDs"), numPermutes);
229 typename decltype(this->TransferData_->permuteToLIDs_)::t_host
230 exportLIDs(view_alloc_no_init("exportLIDs"), numExports);
231
232 // FIXME (mfh 03 Feb 2019) Replace with std::unique_ptr of array,
233 // to avoid superfluous initialization on resize.
234 exportGIDs.resize(numExports);
235
236 {
237 LO numPermutes2 = 0;
238 LO numExports2 = 0;
239 for (LO srcLid = numSameGids; srcLid < numSrcLids; ++srcLid) {
240 const GO curSrcGid = rawSrcGids[srcLid];
241 const LO tgtLid = target.getLocalElement(curSrcGid);
242 if (tgtLid != LINVALID) {
243 permuteToLIDs[numPermutes2] = tgtLid;
244 permuteFromLIDs[numPermutes2] = srcLid;
245 ++numPermutes2;
246 } else {
247 exportGIDs[numExports2] = curSrcGid;
248 exportLIDs[numExports2] = srcLid;
249 ++numExports2;
250 }
251 }
252 TEUCHOS_ASSERT(numPermutes == numPermutes2);
253 TEUCHOS_ASSERT(numExports == numExports2);
254 TEUCHOS_ASSERT(size_t(numExports) == size_t(exportGIDs.size()));
255 }
256
257 // Defer making this->TransferData_->exportLIDs_ until after
258 // getRemoteIndexList, since we might need to shrink it then.
259
260 // exportLIDs is the list of this process' LIDs that it has to
261 // send out. Since this is an Export, and therefore the target
262 // Map is nonoverlapping, we know that each export LID only needs
263 // to be sent to one process. However, the source Map may be
264 // overlapping, so multiple processes might send to the same LID
265 // on a receiving process.
266
267 if (numExports != 0 && !source.isDistributed()) {
268 // This Export has export LIDs, meaning that the source Map has
269 // entries on this process that are not in the target Map on
270 // this process. However, the source Map is not distributed
271 // globally. This implies that this Import is not locally
272 // complete on this process.
273 this->TransferData_->isLocallyComplete_ = false;
274 if (this->verbose()) {
275 std::ostringstream os;
276 os << *prefix << "Export is not locally complete" << endl;
277 this->verboseOutputStream() << os.str();
278 }
279 // mfh 12 Sep 2016: I disagree that this is "abuse"; it may be
280 // correct behavior, depending on the circumstances.
281 TPETRA_ABUSE_WARNING(true, std::runtime_error,
282 "::setupSamePermuteExport(): Source has "
283 "export LIDs but Source is not distributed globally. Exporting to "
284 "a submap of the target map.");
285 }
286
287 // Compute exportPIDs_ ("outgoing" process IDs).
288 //
289 // For each GID in exportGIDs (GIDs to which this process must
290 // send), find its corresponding owning process (a.k.a. "image")
291 // ID in the target Map. Store these process IDs in
292 // exportPIDs_. These are the process IDs to which the Export
293 // needs to send data.
294 //
295 // We only need to do this if the source Map is distributed;
296 // otherwise, the Export doesn't have to perform any
297 // communication.
298 if (source.isDistributed()) {
299 if (this->verbose()) {
300 std::ostringstream os;
301 os << *prefix << "Source Map is distributed; "
302 "call targetMap.getRemoteiNdexList"
303 << endl;
304 this->verboseOutputStream() << os.str();
305 }
306 this->TransferData_->exportPIDs_.resize(exportGIDs.size());
307 // This call will assign any GID in the target Map with no
308 // corresponding process ID a fake process ID of -1. We'll use
309 // this below to remove exports for processses that don't exist.
310 const LookupStatus lookup =
311 target.getRemoteIndexList(exportGIDs(),
312 this->TransferData_->exportPIDs_());
313 // mfh 12 Sep 2016: I disagree that this is "abuse"; it may be
314 // correct behavior, depending on the circumstances.
315 TPETRA_ABUSE_WARNING(lookup == IDNotPresent, std::runtime_error,
316 "::setupSamePermuteExport(): The source Map has GIDs not found "
317 "in the target Map.");
318
319 // Get rid of process IDs not in the target Map. This prevents
320 // exporting to GIDs which don't belong to any process in the
321 // target Map.
322 if (lookup == IDNotPresent) {
323 // There is at least one GID owned by the calling process in
324 // the source Map, which is not owned by any process in the
325 // target Map.
326 this->TransferData_->isLocallyComplete_ = false;
327
328 Teuchos::Array<int>& exportPIDs = this->TransferData_->exportPIDs_;
329
330 const size_type totalNumExports = exportPIDs.size();
331 const size_type numInvalidExports =
332 std::count_if(exportPIDs.begin(), exportPIDs.end(),
333 [](const int procId) { return procId == -1; });
334 if (this->verbose()) {
335 std::ostringstream os;
336 os << *prefix << "totalNumExports: " << totalNumExports
337 << ", numInvalidExports: " << numInvalidExports << endl;
338 this->verboseOutputStream() << os.str();
339 }
340 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(numInvalidExports == 0, std::logic_error,
341 "targetMap.getRemoteIndexList returned IDNotPresent, but no export "
342 "PIDs are -1. Please report this bug to the Tpetra developers.");
343
344 // We know that at least one export ID is invalid, that is,
345 // not in any process on the target Map. If all export IDs
346 // are invalid, we can delete all exports. Otherwise, keep
347 // the valid exports and discard the rest. This is legit
348 // Petra Object Model behavior, but it's a less common case.
349
350 if (numInvalidExports == totalNumExports) {
351 exportGIDs.resize(0);
352 exportLIDs = decltype(exportLIDs)();
353 exportPIDs.resize(0);
354 } else {
355 size_type numValidExports = 0;
356 for (size_type e = 0; e < totalNumExports; ++e) {
357 if (this->TransferData_->exportPIDs_[e] != -1) {
358 exportGIDs[numValidExports] = exportGIDs[e];
359 exportLIDs[numValidExports] = exportLIDs[e];
360 exportPIDs[numValidExports] = exportPIDs[e];
361 ++numValidExports;
362 }
363 }
364 exportGIDs.resize(numValidExports);
365 Kokkos::resize(exportLIDs, numValidExports);
366 exportPIDs.resize(numValidExports);
367 }
368 }
369 }
370
371 // FIXME (mfh 03 Feb 2019) These three DualViews could share a
372 // single device allocation, in order to avoid high cudaMalloc
373 // cost and device memory fragmentation.
374 makeDualViewFromOwningHostView(this->TransferData_->permuteToLIDs_, permuteToLIDs);
375 makeDualViewFromOwningHostView(this->TransferData_->permuteFromLIDs_, permuteFromLIDs);
376 makeDualViewFromOwningHostView(this->TransferData_->exportLIDs_, exportLIDs);
377
378 if (this->verbose()) {
379 std::ostringstream os;
380 os << *prefix << "Done!" << std::endl;
381 this->verboseOutputStream() << os.str();
382 }
383}
384
385template <class LocalOrdinal, class GlobalOrdinal, class Node>
386void Export<LocalOrdinal, GlobalOrdinal, Node>::
387 setupRemote(Teuchos::Array<GlobalOrdinal>& exportGIDs) {
388 using std::endl;
389 using Teuchos::Array;
390 using ::Tpetra::Details::makeDualViewFromOwningHostView;
391 using ::Tpetra::Details::view_alloc_no_init;
392 using LO = LocalOrdinal;
393 using GO = GlobalOrdinal;
394
395 std::unique_ptr<std::string> prefix;
396 if (this->verbose()) {
397 auto srcMap = this->getSourceMap();
398 auto comm = srcMap.is_null() ? Teuchos::null : srcMap->getComm();
399 const int myRank = comm.is_null() ? -1 : comm->getRank();
400
401 std::ostringstream os;
402 os << "Proc " << myRank << ": Tpetra::Export::setupRemote: ";
403 prefix = std::unique_ptr<std::string>(new std::string(os.str()));
404
405 std::ostringstream os2;
406 os2 << *prefix << "Start" << std::endl;
407 this->verboseOutputStream() << os2.str();
408 }
409
410 TEUCHOS_ASSERT(!this->getTargetMap().is_null());
411 const map_type& tgtMap = *(this->getTargetMap());
412
413 // Sort exportPIDs_ in ascending order, and apply the same
414 // permutation to exportGIDs_ and exportLIDs_. This ensures that
415 // exportPIDs_[i], exportGIDs_[i], and exportLIDs_[i] all
416 // refer to the same thing.
417 {
418 TEUCHOS_ASSERT(size_t(this->TransferData_->exportLIDs_.extent(0)) ==
419 size_t(this->TransferData_->exportPIDs_.size()));
420 this->TransferData_->exportLIDs_.modify_host();
421 auto exportLIDs = this->TransferData_->exportLIDs_.view_host();
422 sort3(this->TransferData_->exportPIDs_.begin(),
423 this->TransferData_->exportPIDs_.end(),
424 exportGIDs.getRawPtr(),
425 exportLIDs.data());
426 this->TransferData_->exportLIDs_.sync_device();
427 // FIXME (mfh 03 Feb 2019) We actually end up sync'ing
428 // exportLIDs_ to device twice, once in setupSamePermuteExport,
429 // and once here. We could avoid the first sync.
430 }
431
432 if (this->verbose()) {
433 std::ostringstream os;
434 os << *prefix << "Call createFromSends" << endl;
435 this->verboseOutputStream() << os.str();
436 }
437
438 // Construct the list of entries that calling image needs to send
439 // as a result of everyone asking for what it needs to receive.
440 //
441 // mfh 05 Jan 2012: I understand the above comment as follows:
442 // Construct the communication plan from the list of image IDs to
443 // which we need to send.
444 Teuchos::Array<int>& exportPIDs = this->TransferData_->exportPIDs_;
445 Distributor& distributor = this->TransferData_->distributor_;
446 const size_t numRemoteIDs = distributor.createFromSends(exportPIDs());
447
448 if (this->verbose()) {
449 std::ostringstream os;
450 os << *prefix << "numRemoteIDs: " << numRemoteIDs
451 << "; call doPostsAndWaits" << endl;
452 this->verboseOutputStream() << os.str();
453 }
454
455 // Use the communication plan with ExportGIDs to find out who is
456 // sending to us and get the proper ordering of GIDs for incoming
457 // remote entries (these will be converted to LIDs when done).
458
459 Kokkos::View<const GO*, Kokkos::HostSpace> exportGIDsConst(exportGIDs.data(), exportGIDs.size());
460 Kokkos::View<GO*, Kokkos::HostSpace> remoteGIDs("remoteGIDs", numRemoteIDs);
461 distributor.doPostsAndWaits(exportGIDsConst, 1, remoteGIDs);
462
463 // Remote (incoming) IDs come in as GIDs; convert to LIDs. LIDs
464 // tell this process where to store the incoming remote data.
465 using host_remote_lids_type =
466 typename decltype(this->TransferData_->remoteLIDs_)::t_host;
467 host_remote_lids_type remoteLIDs(view_alloc_no_init("remoteLIDs"), numRemoteIDs);
468
469 for (LO j = 0; j < LO(numRemoteIDs); ++j) {
470 remoteLIDs[j] = tgtMap.getLocalElement(remoteGIDs[j]);
471 }
472 makeDualViewFromOwningHostView(this->TransferData_->remoteLIDs_, remoteLIDs);
473
474 if (this->verbose()) {
475 std::ostringstream os;
476 os << *prefix << "Done!" << endl;
477 this->verboseOutputStream() << os.str();
478 }
479}
480
481} // namespace Tpetra
482
483// Explicit instantiation macro.
484// Only invoke this when in the Tpetra namespace.
485// Most users do not need to use this.
486//
487// LO: The local ordinal type.
488// GO: The global ordinal type.
489// NODE: The Kokkos Node type.
490#define TPETRA_EXPORT_INSTANT(LO, GO, NODE) \
491 template class Export<LO, GO, NODE>;
492
493#endif // TPETRA_EXPORT_DEF_HPP
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Stand-alone utility functions and macros.
#define TPETRA_ABUSE_WARNING(throw_exception_test, Exception, msg)
Handle an abuse warning, according to HAVE_TPETRA_THROW_ABUSE_WARNINGS and HAVE_TPETRA_PRINT_ABUSE_WA...
Struct that holds views of the contents of a CrsMatrix.
bool verbose() const
Whether to print verbose debugging output.
Teuchos::RCP< ImportExportData< LocalOrdinal, GlobalOrdinal, Node > > TransferData_
All the data needed for executing the Export communication plan.
Teuchos::FancyOStream & verboseOutputStream() const
Valid (nonnull) output stream for verbose output.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Describe this object in a human-readable way to the given output stream.
virtual void print(std::ostream &os) const
Print the Export's data to the given output stream.
Export(const Teuchos::RCP< const map_type > &source, const Teuchos::RCP< const map_type > &target)
Construct a Export object from the source and target Map.
auto view_alloc_no_init(const std::string &label) -> decltype(Kokkos::view_alloc(label, Kokkos::WithoutInitializing))
Use in place of the string label as the first argument of Kokkos::View's constructor,...
void makeDualViewFromOwningHostView(Kokkos::DualView< ElementType *, DeviceType > &dv, const typename Kokkos::DualView< ElementType *, DeviceType >::t_host &hostView)
Initialize dv such that its host View is hostView.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
LookupStatus
Return status of Map remote index lookup (getRemoteIndexList()).
void sort3(const IT1 &first1, const IT1 &last1, const IT2 &first2, const IT3 &first3, const bool stableSort=false)
Sort the first array, and apply the same permutation to the second and third arrays.