Zoltan2
Loading...
Searching...
No Matches
Zoltan2_MachineTorusRCA.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Zoltan2: A package of combinatorial algorithms for scientific computing
4//
5// Copyright 2012 NTESS and the Zoltan2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef _ZOLTAN2_MACHINE_TORUS_RCALIB_HPP_
11#define _ZOLTAN2_MACHINE_TORUS_RCALIB_HPP_
12
13#include <Teuchos_Comm.hpp>
14#include <Teuchos_CommHelpers.hpp>
15#include <Zoltan2_Machine.hpp>
16
17#ifdef HAVE_ZOLTAN2_RCALIB
18extern "C"{
19#include <rca_lib.h>
20}
21#endif
22
23
24namespace Zoltan2{
25
29template <typename pcoord_t, typename part_t>
30class MachineTorusRCA : public Machine <pcoord_t, part_t> {
31
32public:
37 MachineTorusRCA(const Teuchos::Comm<int> &comm):
38 Machine<pcoord_t,part_t>(comm),
39 networkDim(3),
40 actual_networkDim(3),
41 procCoords(NULL),
42 actual_procCoords(NULL),
43 machine_extent(NULL),
44 actual_machine_extent(NULL),
45 is_transformed(false),
46 pl(NULL) {
47
48 actual_machine_extent = machine_extent = new int[networkDim];
49 this->getRealMachineExtent(this->machine_extent);
50 actual_machine_extent = machine_extent;
51
52 //allocate memory for processor coordinates.
53 actual_procCoords = procCoords = new pcoord_t *[networkDim];
54 for (int i = 0; i < networkDim; ++i) {
55 procCoords[i] = new pcoord_t[this->numRanks];
56 memset(procCoords[i], 0, sizeof(pcoord_t) * this->numRanks);
57 }
58
59 //obtain the coordinate of the processor.
60 pcoord_t *xyz = new pcoord_t[networkDim];
62 for (int i = 0; i < networkDim; i++)
63 procCoords[i][this->myRank] = xyz[i];
64 delete [] xyz;
65
66 //reduceAll the coordinates of each processor.
67 gatherMachineCoordinates(comm);
68 }
69
70 virtual bool getMachineExtentWrapArounds(bool *wrap_around) const {
71 int dim = 0;
72 int transformed_network_dim = networkDim;
73 if (dim < transformed_network_dim)
74 wrap_around[dim++] = true;
75 if (dim < transformed_network_dim)
76 wrap_around[dim++] = true;
77 if (dim < transformed_network_dim)
78 wrap_around[dim++] = true;
79 return true;
80 }
81
82 MachineTorusRCA(const Teuchos::Comm<int> &comm,
83 const Teuchos::ParameterList &pl_):
84 Machine<pcoord_t,part_t>(comm),
85 networkDim(3),
86 actual_networkDim(3),
87 procCoords(NULL),
88 actual_procCoords(NULL),
89 machine_extent(NULL),
90 actual_machine_extent(NULL),
91 is_transformed(false),
92 pl(&pl_) {
93
94 actual_machine_extent = machine_extent = new int[networkDim];
95 this->getRealMachineExtent(this->machine_extent);
96 actual_machine_extent = machine_extent;
97
98 //allocate memory for processor coordinates.
99 actual_procCoords = procCoords = new pcoord_t *[networkDim];
100 for (int i = 0; i < networkDim; ++i) {
101 procCoords[i] = new pcoord_t[this->numRanks];
102 memset(procCoords[i], 0, sizeof(pcoord_t) * this->numRanks);
103 }
104 //obtain the coordinate of the processor.
105 pcoord_t *xyz = new pcoord_t[networkDim];
107 for (int i = 0; i < networkDim; i++)
108 procCoords[i][this->myRank] = xyz[i];
109 delete [] xyz;
110
111
112 //reduceAll the coordinates of each processor.
113 gatherMachineCoordinates(comm);
114
115 const Teuchos::ParameterEntry *pe2 =
116 this->pl->getEntryPtr("Machine_Optimization_Level");
117// this->printAllocation();
118
119 if (pe2) {
120 int optimization_level;
121 optimization_level = pe2->getValue<int>(&optimization_level);
122
123 if (optimization_level == 1) {
124 is_transformed = true;
125 this->networkDim = 3;
126 procCoords = new pcoord_t * [networkDim];
127 for(int i = 0; i < networkDim; ++i) {
128 procCoords[i] = new pcoord_t[this->numRanks] ;
129 //this->proc_coords[permutation[i]];
130 }
131 for (int i = 0; i < this->numRanks; ++i) {
132 procCoords[0][i] = this->actual_procCoords[0][i] * 8;
133 int yordinal = this->actual_procCoords[1][i];
134 procCoords[1][i] = yordinal/2 * (16 + 8) + (yordinal %2) * 8;
135 int zordinal = this->actual_procCoords[2][i];
136 procCoords[2][i] = zordinal * 5 + (zordinal / 8) * 3;
137 }
138 int mx = this->machine_extent[0];
139 int my = this->machine_extent[1];
140 int mz = this->machine_extent[2];
141
142
143 this->machine_extent = new int[networkDim];
144 this->machine_extent[0] = mx * 8;
145 this->machine_extent[1] = my/2 * (16 + 8) + (my %2) * 8;
146 this->machine_extent[2] = mz * 5 + (mz / 8) * 3;
147 if(this->myRank == 0)
148 std::cout << "Transforming the coordinates" << std::endl;
149// this->printAllocation();
150 }
151 else if(optimization_level >= 3) {
152 is_transformed = true;
153 this->networkDim = 6;
154 procCoords = new pcoord_t * [networkDim];
155 for(int i = 0; i < networkDim; ++i) {
156 procCoords[i] = new pcoord_t[this->numRanks] ;
157// this->proc_coords[permutation[i]];
158 }
159
160// this->machine_extent[0] = this->actual_machine_extent
161 this->machine_extent = new int[networkDim];
162
163 this->machine_extent[0] =
164 ceil (int (this->actual_machine_extent[0]) / 2.0) * 64 ;
165 this->machine_extent[3] = 2 * 8 ;
166 this->machine_extent[1] =
167 ceil(int (this->actual_machine_extent[1]) / 2.0) * 8 * 2400;
168 this->machine_extent[4] = 2 * 8;
169 this->machine_extent[2] =
170 ceil((int (this->actual_machine_extent[2])) / 8.0) * 160;
171 this->machine_extent[5] = 8 * 5;
172
173 for (int k = 0; k < this->numRanks ; k++) {
174 // This part is for titan.
175 // But it holds for other 3D torus machines such as Bluewaters.
176
177 // Bandwitdh along
178 // X = 75
179 // Y = 37.5 or 75 --- everyother has 37.5
180 // --- Y[0-1] =75 but Y[1-2]=37.5
181 // Z = 75 or 120 ---- Y[0-1-2-3-4-5-6-7] = 120, Y[7-8] = 75
182
183 // Along X we make groups of 2. Then scale the distance with 64.
184 // First dimension is represents x/2
185 procCoords[0][k] = (int (this->actual_procCoords[0][k]) / 2) * 64;
186 // Then the 3rd dimension is x%2. distance is scaled with 8,
187 // reversely proportional with bw=75
188 procCoords[3][k] = (int (this->actual_procCoords[0][k]) % 2) * 8 ;
189
190 // Along Y. Every other one has the slowest link. So we want
191 // distances between Y/2 huge.
192 // We scale Y/2 with 2400 so that we make sure that it is the
193 // first one we divie.
194 procCoords[1][k] =
195 (int (this->actual_procCoords[1][k]) / 2) * 8 * 2400;
196 // The other one is scaled with 8 as in X.
197 procCoords[4][k] = (int (this->actual_procCoords[1][k]) % 2) * 8;
198
199 // We make groups of 8 along Z. Then distances between these
200 // groups are scaled with 160.
201 // So that it is more than 2x distance than the distance with
202 // X grouping.
203 // That is we scale the groups of Zs with 160. Groups of X with 64.
204 // Zs has 8 processors connecting them, while X has only one. We
205 // want to divide along
206 // Z twice before dividing along X.
207 procCoords[2][k] =
208 ((int (this->actual_procCoords[2][k])) / 8) * 160;
209 // In the second group everything is scaled with 5, as bw=120
210 procCoords[5][k] =
211 ((int (this->actual_procCoords[2][k])) % 8) * 5;
212 }
213 }
214 else if(optimization_level == 2) {
215 // This is as above case. but we make groups of 3 along X instead.
216 is_transformed = true;
217 this->networkDim = 6;
218 procCoords = new pcoord_t * [networkDim];
219 for(int i = 0; i < networkDim; ++i) {
220 procCoords[i] = new pcoord_t[this->numRanks] ;
221// this->proc_coords[permutation[i]];
222 }
223
224// this->machine_extent[0] = this->actual_machine_extent
225 this->machine_extent = new int[networkDim];
226
227 this->machine_extent[0] =
228 ceil(int (this->actual_machine_extent[0]) / 3.0) * 128 ;
229 this->machine_extent[3] = 3 * 8 ;
230 this->machine_extent[1] =
231 ceil(int (this->actual_machine_extent[1]) / 2.0) * 8 * 2400;
232 this->machine_extent[4] = 2 * 8;
233 this->machine_extent[2] =
234 ceil((int (this->actual_machine_extent[2])) / 8.0) * 160;
235 this->machine_extent[5] = 8 * 5;
236
237
238 for (int k = 0; k < this->numRanks ; k++) {
239 // This part is for titan.
240 // But it holds for other 3D torus machines such as Bluewaters.
241
242 // Bandwitdh along
243 // X = 75
244 // Y = 37.5 or 75 --- everyother has 37.5
245 // --- Y[0-1] =75 but Y[1-2]=37.5
246 // Z = 75 or 120 ---- Y[0-1-2-3-4-5-6-7] = 120, Y[7-8] = 75
247
248 // In this case we make groups of 3. along X.
249 procCoords[0][k] = (int (this->actual_procCoords[0][k]) / 3) * 128;
250 // Then the 3rd dimension is x%2. distance is scaled with 8,
251 // reversely proportional with bw=75
252 procCoords[3][k] = (int (this->actual_procCoords[0][k]) % 3) * 8 ;
253
254 // Along Y. Every other one has the slowest link. So we want
255 // distances between Y/2 huge.
256 // We scale Y/2 with 2400 so that we make sure that it is the
257 // first one we divie.
258 procCoords[1][k] =
259 (int (this->actual_procCoords[1][k]) / 2) * 8 * 2400;
260 // The other one is scaled with 8 as in X.
261 procCoords[4][k] = (int (this->actual_procCoords[1][k]) % 2) * 8;
262
263
264 procCoords[2][k] =
265 ((int (this->actual_procCoords[2][k])) / 8) * 160;
266 // In the second group everything is scaled with 5, as bw=120
267 procCoords[5][k] = ((int (this->actual_procCoords[2][k])) % 8) * 5;
268 }
269 }
270 }
271 }
272
273
274
275
277 if (is_transformed) {
278 is_transformed = false;
279 for (int i = 0; i < actual_networkDim; i++) {
280 delete [] actual_procCoords[i];
281 }
282 delete [] actual_procCoords;
283 delete [] actual_machine_extent;
284 }
285 for (int i = 0; i < networkDim; i++) {
286 delete [] procCoords[i];
287 }
288 delete [] procCoords;
289 delete [] machine_extent;
290 }
291
292 bool hasMachineCoordinates() const { return true; }
293
294 int getMachineDim() const { return this->networkDim; }
295 int getRealMachineDim() const { return this->actual_networkDim; }
296
297 bool getMachineExtent(int *nxyz) const {
298 if (is_transformed) {
299 return false;
300 }
301 else {
302 int dim = 0;
303 nxyz[dim++] = this->machine_extent[0]; // X
304 nxyz[dim++] = this->machine_extent[1]; // Y
305 nxyz[dim++] = this->machine_extent[2]; // Z
306 return true;
307 }
308 }
309
310 bool getRealMachineExtent(int *nxyz) const {
311#if defined (HAVE_ZOLTAN2_RCALIB)
312 mesh_coord_t mxyz;
313 rca_get_max_dimension(&mxyz);
314 int dim = 0;
315 nxyz[dim++] = mxyz.mesh_x + 1; // X
316 nxyz[dim++] = mxyz.mesh_y + 1; // Y
317 nxyz[dim++] = mxyz.mesh_z + 1; // Z
318 return true;
319#else
320 return false;
321#endif
322 }
323
324
326 if(this->myRank == 0) {
327 for (int i = 0; i < this->numRanks; ++i) {
328 std::cout << "Rank:" << i
329 << " " << procCoords[0][i]
330 << " " << procCoords[1][i]
331 << " " << procCoords[2][i] << std::endl;
332 }
333 std::cout << "Machine Extent:"
334 << " " << this->machine_extent[0]
335 << " " << this->machine_extent[1]
336 << " " << this->machine_extent[2] << std::endl;
337 }
338 }
339
340 bool getMyMachineCoordinate(pcoord_t *xyz) {
341 for (int i = 0; i < this->networkDim; ++i) {
342 xyz[i] = procCoords[i][this->myRank];
343 }
344 return true;
345 }
346
347 bool getMyActualMachineCoordinate(pcoord_t *xyz) {
348#if defined (HAVE_ZOLTAN2_RCALIB)
349 rs_node_t nodeInfo; /* Cray node info for node running this function */
350 rca_get_nodeid(&nodeInfo);
351 int NIDs = (int)nodeInfo.rs_node_s._node_id; /* its node ID */
352
353 mesh_coord_t node_coord;
354 int returnval = rca_get_meshcoord((uint16_t)NIDs, &node_coord);
355 if (returnval == -1) {
356 return false;
357 }
358 xyz[0] = node_coord.mesh_x;
359 xyz[1] = node_coord.mesh_y;
360 xyz[2] = node_coord.mesh_z;
361 return true;
362#else
363 return false;
364#endif
365 }
366
367 inline bool getMachineCoordinate(const int rank,
368 pcoord_t *xyz) const {
369 for (int i = 0; i < this->networkDim; ++i) {
370 xyz[i] = procCoords[i][rank];
371 }
372 return true;
373 }
374
375
376 bool getMachineCoordinate(const char *nodename, pcoord_t *xyz) {
377 return false; // cannot yet return from nodename
378 }
379
380 bool getAllMachineCoordinatesView(pcoord_t **&allCoords) const {
381 allCoords = procCoords;
382 return true;
383 }
384
385 virtual bool getHopCount(int rank1, int rank2, pcoord_t &hops) const override {
386 hops = 0;
387 for (int i = 0; i < networkDim; ++i) {
388 pcoord_t distance = procCoords[i][rank1] - procCoords[i][rank2];
389 if (distance < 0)
390 distance = -distance;
391 if (machine_extent[i] - distance < distance)
392 distance = machine_extent[i] - distance;
393 hops += distance;
394 }
395 return true;
396 }
397
398
399private:
400
401 int networkDim;
402 int actual_networkDim;
403
404 pcoord_t **procCoords;
405 pcoord_t **actual_procCoords;
406
407 part_t *machine_extent;
408 part_t *actual_machine_extent;
409 bool is_transformed;
410
411
412 const Teuchos::ParameterList *pl;
413
414/*
415 bool delete_transformed_coords;
416 int transformed_network_dim;
417 pcoord_t **transformed_coordinates;
418*/
419
420 void gatherMachineCoordinates(const Teuchos::Comm<int> &comm) {
421 // reduces and stores all machine coordinates.
422 pcoord_t *tmpVect = new pcoord_t [this->numRanks];
423
424 for (int i = 0; i < networkDim; i++) {
425 Teuchos::reduceAll<int, pcoord_t>(comm, Teuchos::REDUCE_SUM,
426 this->numRanks,
427 procCoords[i], tmpVect);
428 pcoord_t *tmp = tmpVect;
429 tmpVect = procCoords[i];
430 procCoords[i] = tmp;
431 }
432 delete [] tmpVect;
433 }
434
435};
436
437} // namespace Zoltan2
438#endif
An RCA Machine class on Torus Networks.
virtual bool getMachineExtentWrapArounds(bool *wrap_around) const
bool getRealMachineExtent(int *nxyz) const
MachineTorusRCA(const Teuchos::Comm< int > &comm)
Constructor: A BlueGeneQ network machine description;.
virtual bool getHopCount(int rank1, int rank2, pcoord_t &hops) const override
getHopCount function set hops between rank1 and rank2 return true if coordinates are available
MachineTorusRCA(const Teuchos::Comm< int > &comm, const Teuchos::ParameterList &pl_)
bool getAllMachineCoordinatesView(pcoord_t **&allCoords) const
bool getMyMachineCoordinate(pcoord_t *xyz)
bool getMachineCoordinate(const int rank, pcoord_t *xyz) const
bool getMachineExtent(int *nxyz) const
bool getMachineCoordinate(const char *nodename, pcoord_t *xyz)
bool getMyActualMachineCoordinate(pcoord_t *xyz)
MachineClass Base class for representing machine coordinates, networks, etc.
Created by mbenlioglu on Aug 31, 2020.
SparseMatrixAdapter_t::part_t part_t