Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_ComputeGatherMap.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef __Tpetra_ComputeGatherMap_hpp
11#define __Tpetra_ComputeGatherMap_hpp
12
17#include "Tpetra_Map.hpp"
18#include <numeric>
19
20// Macro that marks a function as "possibly unused," in order to
21// suppress build warnings.
22#if !defined(TRILINOS_UNUSED_FUNCTION)
23#if defined(__GNUC__) || (defined(__INTEL_COMPILER) && !defined(_MSC_VER))
24#define TRILINOS_UNUSED_FUNCTION __attribute__((__unused__))
25#elif defined(__clang__)
26#if __has_attribute(unused)
27#define TRILINOS_UNUSED_FUNCTION __attribute__((__unused__))
28#else
29#define TRILINOS_UNUSED_FUNCTION
30#endif // Clang has 'unused' attribute
31#elif defined(__IBMCPP__)
32// IBM's C++ compiler for Blue Gene/Q (V12.1) implements 'used' but not 'unused'.
33//
34// http://pic.dhe.ibm.com/infocenter/compbg/v121v141/index.jsp
35#define TRILINOS_UNUSED_FUNCTION
36#else // some other compiler
37#define TRILINOS_UNUSED_FUNCTION
38#endif
39#endif // ! defined(TRILINOS_UNUSED_FUNCTION)
40
41namespace Tpetra {
42namespace Details {
43
44namespace {
45#ifdef HAVE_MPI
46// We're communicating integers of type IntType. Figure
47// out the right MPI_Datatype for IntType. Usually it
48// is int or long, so these are good enough for now.
49template <class IntType>
50TRILINOS_UNUSED_FUNCTION MPI_Datatype
51getMpiDatatype() {
52 TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
53 "Not implemented for IntType != int, long, or long long");
54}
55
56template <>
57TRILINOS_UNUSED_FUNCTION MPI_Datatype
58getMpiDatatype<int>() { return MPI_INT; }
59
60template <>
61TRILINOS_UNUSED_FUNCTION MPI_Datatype
62getMpiDatatype<long>() { return MPI_LONG; }
63
64template <>
65TRILINOS_UNUSED_FUNCTION MPI_Datatype
66getMpiDatatype<long long>() { return MPI_LONG_LONG; }
67#endif // HAVE_MPI
68
69template <class IntType>
70void gather(const IntType sendbuf[], const int sendcnt,
71 IntType recvbuf[], const int recvcnt,
72 const int root, const Teuchos::RCP<const Teuchos::Comm<int> >& comm) {
73 using Teuchos::RCP;
74 using Teuchos::rcp_dynamic_cast;
75#ifdef HAVE_MPI
76 using Teuchos::MpiComm;
77
78 // Get the raw MPI communicator, so we don't have to wrap MPI_Gather in Teuchos.
79 RCP<const MpiComm<int> > mpiComm = rcp_dynamic_cast<const MpiComm<int> >(comm);
80 if (!mpiComm.is_null()) {
81 MPI_Datatype sendtype = getMpiDatatype<IntType>();
82 MPI_Datatype recvtype = sendtype;
83 MPI_Comm rawMpiComm = *(mpiComm->getRawMpiComm());
84 const int err = MPI_Gather(reinterpret_cast<void*>(const_cast<IntType*>(sendbuf)),
85 sendcnt,
86 sendtype,
87 reinterpret_cast<void*>(recvbuf),
88 recvcnt,
89 recvtype,
90 root,
91 rawMpiComm);
92 TEUCHOS_TEST_FOR_EXCEPTION(
93 err != MPI_SUCCESS, std::runtime_error, "MPI_Gather failed");
94 return;
95 }
96#endif // HAVE_MPI
97
98 TEUCHOS_TEST_FOR_EXCEPTION(
99 recvcnt > sendcnt, std::invalid_argument,
100 "gather: If the input communicator contains only one process, "
101 "then you cannot receive more entries than you send. "
102 "You aim to receive "
103 << recvcnt << " entries, "
104 "but to send "
105 << sendcnt << ".");
106 // Serial communicator case: just copy. recvcnt is the
107 // amount to receive, so it's the amount to copy.
108 std::copy(sendbuf, sendbuf + recvcnt, recvbuf);
109}
110
111template <class IntType>
112void gatherv(const IntType sendbuf[], const int sendcnt,
113 IntType recvbuf[], const int recvcnts[], const int displs[],
114 const int root, const Teuchos::RCP<const Teuchos::Comm<int> >& comm) {
115 using Teuchos::RCP;
116 using Teuchos::rcp_dynamic_cast;
117#ifdef HAVE_MPI
118 using Teuchos::MpiComm;
119
120 // Get the raw MPI communicator, so we don't have to wrap
121 // MPI_Gather in Teuchos.
122 RCP<const MpiComm<int> > mpiComm =
123 rcp_dynamic_cast<const MpiComm<int> >(comm);
124 if (!mpiComm.is_null()) {
125 MPI_Datatype sendtype = getMpiDatatype<IntType>();
126 MPI_Datatype recvtype = sendtype;
127 MPI_Comm rawMpiComm = *(mpiComm->getRawMpiComm());
128 const int err = MPI_Gatherv(reinterpret_cast<void*>(const_cast<IntType*>(sendbuf)),
129 sendcnt,
130 sendtype,
131 reinterpret_cast<void*>(recvbuf),
132 const_cast<int*>(recvcnts),
133 const_cast<int*>(displs),
134 recvtype,
135 root,
136 rawMpiComm);
137 TEUCHOS_TEST_FOR_EXCEPTION(
138 err != MPI_SUCCESS, std::runtime_error, "MPI_Gatherv failed");
139 return;
140 }
141#endif // HAVE_MPI
142 TEUCHOS_TEST_FOR_EXCEPTION(
143 recvcnts[0] > sendcnt,
144 std::invalid_argument,
145 "gatherv: If the input communicator contains only one process, "
146 "then you cannot receive more entries than you send. "
147 "You aim to receive "
148 << recvcnts[0] << " entries, "
149 "but to send "
150 << sendcnt << ".");
151 // Serial communicator case: just copy. recvcnts[0] is the
152 // amount to receive, so it's the amount to copy. Start
153 // writing to recvbuf at the offset displs[0].
154 std::copy(sendbuf, sendbuf + recvcnts[0], recvbuf + displs[0]);
155}
156} // namespace
157
158// Given an arbitrary Map, compute a Map containing all the GIDs
159// in the same order as in (the one-to-one version of) map, but
160// all owned exclusively by Proc 0.
161template <class MapType>
162Teuchos::RCP<const MapType>
163computeGatherMap(Teuchos::RCP<const MapType> map,
164 const Teuchos::RCP<Teuchos::FancyOStream>& err,
165 const bool dbg = false) {
166 using std::endl;
167 using Teuchos::Array;
168 using Teuchos::ArrayRCP;
169 using Teuchos::ArrayView;
170 using Teuchos::as;
171 using Teuchos::Comm;
172 using Teuchos::RCP;
175 typedef typename MapType::local_ordinal_type LO;
176 typedef typename MapType::global_ordinal_type GO;
177 typedef typename MapType::node_type NT;
178
179 RCP<const Comm<int> > comm = map->getComm();
180 const int numProcs = comm->getSize();
181 const int myRank = comm->getRank();
182
183 if (!err.is_null()) {
184 err->pushTab();
185 }
186 if (dbg) {
187 *err << myRank << ": computeGatherMap:" << endl;
188 }
189 if (!err.is_null()) {
190 err->pushTab();
191 }
192
193 RCP<const MapType> oneToOneMap;
194 if (map->isContiguous()) {
195 oneToOneMap = map; // contiguous Maps are always 1-to-1
196 } else {
197 if (dbg) {
198 *err << myRank << ": computeGatherMap: Calling createOneToOne" << endl;
199 }
200 // It could be that Map is one-to-one, but the class doesn't
201 // give us a way to test this, other than to create the
202 // one-to-one Map.
203 oneToOneMap = createOneToOne<LO, GO, NT>(map);
204 }
205
206 RCP<const MapType> gatherMap;
207 if (numProcs == 1) {
208 gatherMap = oneToOneMap;
209 } else {
210 if (dbg) {
211 *err << myRank << ": computeGatherMap: Gathering Map counts" << endl;
212 }
213 // Gather each process' count of Map elements to Proc 0,
214 // into the recvCounts array. This will tell Proc 0 how
215 // many GIDs to expect from each process when calling
216 // MPI_Gatherv. Counts and offsets are all int, because
217 // that's what MPI uses. Teuchos::as will at least prevent
218 // bad casts to int in a debug build.
219 //
220 // Yeah, it's not memory scalable. Guess what: We're going
221 // to bring ALL the data to Proc 0, not just the receive
222 // counts. The first MPI_Gather is only the beginning...
223 // Seriously, if you want to make this memory scalable, the
224 // right thing to do (after the Export to the one-to-one
225 // Map) is to go round robin through the processes, having
226 // each send a chunk of data (with its GIDs, to get the
227 // order of rows right) at a time, and overlapping writing
228 // to the file (resp. reading from it) with receiving (resp.
229 // sending) the next chunk.
230 const int myEltCount = as<int>(oneToOneMap->getLocalNumElements());
231 Array<int> recvCounts(numProcs);
232 const int rootProc = 0;
233 gather<int>(&myEltCount, 1, recvCounts.getRawPtr(), 1, rootProc, comm);
234
235 ArrayView<const GO> myGlobalElts = oneToOneMap->getLocalElementList();
236 const int numMyGlobalElts = as<int>(myGlobalElts.size());
237 // Only Proc 0 needs to receive and store all the GIDs (from
238 // all processes).
239 ArrayRCP<GO> allGlobalElts;
240 if (myRank == 0) {
241 allGlobalElts = Teuchos::arcp<GO>(oneToOneMap->getGlobalNumElements());
242 std::fill(allGlobalElts.begin(), allGlobalElts.end(), 0);
243 }
244
245 if (dbg) {
246 *err << myRank << ": computeGatherMap: Computing MPI_Gatherv "
247 "displacements"
248 << endl;
249 }
250 // Displacements for gatherv() in this case (where we are
251 // gathering into a contiguous array) are an exclusive
252 // partial sum (first entry is zero, second starts the
253 // partial sum) of recvCounts.
254 Array<int> displs(numProcs, 0);
255 std::partial_sum(recvCounts.begin(), recvCounts.end() - 1,
256 displs.begin() + 1);
257 if (dbg) {
258 *err << myRank << ": computeGatherMap: Calling MPI_Gatherv" << endl;
259 }
260 gatherv<GO>(myGlobalElts.getRawPtr(), numMyGlobalElts,
261 allGlobalElts.getRawPtr(), recvCounts.getRawPtr(),
262 displs.getRawPtr(), rootProc, comm);
263
264 if (dbg) {
265 *err << myRank << ": computeGatherMap: Creating gather Map" << endl;
266 }
267 // Create a Map with all the GIDs, in the same order as in
268 // the one-to-one Map, but owned by Proc 0.
269 ArrayView<const GO> allElts(NULL, 0);
270 if (myRank == 0) {
271 allElts = allGlobalElts();
272 }
273 const global_size_t INVALID = Teuchos::OrdinalTraits<global_size_t>::invalid();
274 gatherMap = rcp(new MapType(INVALID, allElts,
275 oneToOneMap->getIndexBase(),
276 comm));
277 }
278 if (!err.is_null()) {
279 err->popTab();
280 }
281 if (dbg) {
282 *err << myRank << ": computeGatherMap: done" << endl;
283 }
284 if (!err.is_null()) {
285 err->popTab();
286 }
287 return gatherMap;
288}
289
290} // namespace Details
291} // namespace Tpetra
292
293#endif // __Tpetra_ComputeGatherMap_hpp
Struct that holds views of the contents of a CrsMatrix.
Implementation details of Tpetra.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
size_t global_size_t
Global size_t object.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified,...