Zoltan2
Loading...
Searching...
No Matches
Zoltan2_MachineTorusRCAForTesting.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Zoltan2: A package of combinatorial algorithms for scientific computing
4//
5// Copyright 2012 NTESS and the Zoltan2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef _ZOLTAN2_MACHINE_TORUS_RCALIBTEST_HPP_
11#define _ZOLTAN2_MACHINE_TORUS_RCALIBTEST_HPP_
12
13#include <Teuchos_Comm.hpp>
14#include <Teuchos_CommHelpers.hpp>
15#include <Zoltan2_Machine.hpp>
16
17#include <cstdlib> /* srand, rand */
18#include <fstream>
19#include <string>
20
21namespace Zoltan2{
22
27template <typename pcoord_t, typename part_t>
28class MachineTorusRCAForTesting : public Machine <pcoord_t, part_t> {
29
30public:
35 MachineTorusRCAForTesting(const Teuchos::Comm<int> &comm):
36 Machine<pcoord_t,part_t>(comm),
37 networkDim(3), actual_networkDim(3),
38 procCoords(NULL), actual_procCoords(NULL),
39 machine_extent(NULL),actual_machine_extent(NULL),
40 is_transformed(false), pl(NULL)
41 {
42 actual_machine_extent = machine_extent = new int[networkDim];
43 this->getRealMachineExtent(this->machine_extent);
44 actual_machine_extent = machine_extent;
45
46 // Allocate memory for processor coordinates.
47 actual_procCoords = procCoords = new pcoord_t *[networkDim];
48 for (int i = 0; i < networkDim; ++i) {
49 procCoords[i] = new pcoord_t[this->numRanks];
50 memset(procCoords[i], 0, sizeof(pcoord_t) * this->numRanks);
51 }
52
53 // Obtain the coordinate of the processor.
54 pcoord_t *xyz = new pcoord_t[networkDim];
56 for (int i = 0; i < networkDim; i++)
57 procCoords[i][this->myRank] = xyz[i];
58 delete [] xyz;
59
60
61 // reduceAll the coordinates of each processor.
62 gatherMachineCoordinates(comm);
63 }
64
65 virtual bool getMachineExtentWrapArounds(bool *wrap_around) const {
66 int dim = 0;
67 int transformed_network_dim = networkDim;
68
69 if (dim < transformed_network_dim)
70 wrap_around[dim++] = true;
71 if (dim < transformed_network_dim)
72 wrap_around[dim++] = true;
73 if (dim < transformed_network_dim)
74 wrap_around[dim++] = true;
75 return true;
76 }
77
78 MachineTorusRCAForTesting(const Teuchos::Comm<int> &comm,
79 const Teuchos::ParameterList &pl_):
80 Machine<pcoord_t,part_t>(comm),
81 networkDim(3), actual_networkDim(3),
82 procCoords(NULL), actual_procCoords(NULL),
83 machine_extent(NULL),actual_machine_extent(NULL),
84 is_transformed(false), pl(&pl_)
85 {
86
87 actual_machine_extent = machine_extent = new int[networkDim];
88 this->getRealMachineExtent(this->machine_extent);
89 actual_machine_extent = machine_extent;
90
91 // Allocate memory for processor coordinates.
92 actual_procCoords = procCoords = new pcoord_t *[networkDim];
93
94
95 const Teuchos::ParameterEntry *pe1 =
96 this->pl->getEntryPtr("Input_RCA_Machine_Coords");
97 if (pe1) {
98 std::string input_coord_file;
99 input_coord_file = pe1->getValue<std::string>(&input_coord_file);
100 if (input_coord_file != "") {
101
102 if (this->myRank == 0) {
103 std::vector < std::vector <pcoord_t> > proc_coords(networkDim);
104 std::fstream machine_coord_file(input_coord_file.c_str());
105
106 part_t i = 0;
107 pcoord_t a,b, c;
108 machine_coord_file >> a >> b >> c;
109 while(!machine_coord_file.eof()) {
110 proc_coords[0].push_back(a);
111 proc_coords[1].push_back(b);
112 proc_coords[2].push_back(c);
113 ++i;
114 machine_coord_file >> a >> b >> c;
115 }
116
117 machine_coord_file.close();
118 std::cout << "Rewriting numprocs from:"
119 << this->numRanks << " to:" << i << std::endl;
120 this->numRanks = i;
121
122 for(int ii = 0; ii < networkDim; ++ii) {
123 procCoords[ii] = new pcoord_t[this->numRanks];
124 for (int j = 0; j < this->numRanks; ++j) {
125 procCoords[ii][j] = proc_coords[ii][j];
126 }
127 }
128 }
129 comm.broadcast(0, sizeof(int), (char *) &(this->numRanks));
130
131 if (this->myRank != 0) {
132 for (int i = 0; i < networkDim; ++i) {
133 procCoords[i] = new pcoord_t[this->numRanks];
134 memset(procCoords[i], 0, sizeof(pcoord_t) * this->numRanks);
135 }
136 }
137 }
138 }
139 else {
140 for (int i = 0; i < networkDim; ++i) {
141 procCoords[i] = new pcoord_t[this->numRanks];
142 memset(procCoords[i], 0, sizeof(pcoord_t) * this->numRanks);
143 }
144 // Obtain the coordinate of the processor.
145 pcoord_t *xyz = new pcoord_t[networkDim];
147 for (int i = 0; i < networkDim; i++)
148 procCoords[i][this->myRank] = xyz[i];
149 delete [] xyz;
150 }
151
152 // reduceAll the coordinates of each processor.
153 gatherMachineCoordinates(comm);
154
155 const Teuchos::ParameterEntry *pe2 =
156 this->pl->getEntryPtr("Machine_Optimization_Level");
157// this->printAllocation();
158 if (pe2) {
159 int optimization_level;
160 optimization_level = pe2->getValue<int>(&optimization_level);
161
162 if (optimization_level == 1) {
163 is_transformed = true;
164 this->networkDim = 3;
165 procCoords = new pcoord_t * [networkDim];
166 for(int i = 0; i < networkDim; ++i) {
167 procCoords[i] = new pcoord_t[this->numRanks] ;
168// this->proc_coords[permutation[i]];
169 }
170 for (int i = 0; i < this->numRanks; ++i) {
171 procCoords[0][i] = this->actual_procCoords[0][i] * 8;
172 int yordinal = this->actual_procCoords[1][i];
173 procCoords[1][i] = yordinal/2 * (16 + 8) + (yordinal %2) * 8;
174 int zordinal = this->actual_procCoords[2][i];
175 procCoords[2][i] = zordinal * 5 + (zordinal / 8) * 3;
176 }
177 int mx = this->machine_extent[0];
178 int my = this->machine_extent[1];
179 int mz = this->machine_extent[2];
180
181
182 this->machine_extent = new int[networkDim];
183 this->machine_extent[0] = mx * 8;
184 this->machine_extent[1] = my/2 * (16 + 8) + (my %2) * 8;
185 this->machine_extent[2] = mz * 5 + (mz / 8) * 3;
186 if(this->myRank == 0)
187 std::cout << "Transforming the coordinates" << std::endl;
188// this->printAllocation();
189 }
190 else if(optimization_level >= 3) {
191 is_transformed = true;
192 this->networkDim = 6;
193 procCoords = new pcoord_t * [networkDim];
194 for(int i = 0; i < networkDim; ++i) {
195 procCoords[i] = new pcoord_t[this->numRanks] ;
196// this->proc_coords[permutation[i]];
197 }
198
199// this->machine_extent[0] = this->actual_machine_extent
200 this->machine_extent = new int[networkDim];
201
202 this->machine_extent[0] =
203 ceil (int (this->actual_machine_extent[0]) / 2.0) * 64 ;
204 this->machine_extent[3] = 2 * 8 ;
205 this->machine_extent[1] =
206 ceil(int (this->actual_machine_extent[1]) / 2.0) * 8 * 2400;
207 this->machine_extent[4] = 2 * 8;
208 this->machine_extent[2] =
209 ceil((int (this->actual_machine_extent[2])) / 8.0) * 160;
210 this->machine_extent[5] = 8 * 5;
211
212 for (int k = 0; k < this->numRanks ; k++) {
213 // This part is for titan.
214 // But it holds for other 3D torus machines such as Bluewaters.
215
216 // Bandwitdh along
217 // X = 75
218 // Y = 37.5 or 75 --- everyother has 37.5
219 // --- Y[0-1] =75 but Y[1-2]=37.5
220 // Z = 75 or 120 ---- Y[0-1-2-3-4-5-6-7] = 120, Y[7-8] = 75
221
222 // Along X we make groups of 2. Then scale the distance with 64.
223 // First dimension is represents x/2
224 procCoords[0][k] = (int (this->actual_procCoords[0][k]) / 2) * 64;
225 // Then the 3rd dimension is x%2. distance is scaled with 8,
226 // reversely proportional with bw=75
227 procCoords[3][k] = (int (this->actual_procCoords[0][k]) % 2) * 8 ;
228
229 // Along Y. Every other one has the slowest link. So we want
230 // distances between Y/2 huge.
231 // We scale Y/2 with 2400 so that we make sure that it is the
232 // first one we divie.
233 procCoords[1][k] =
234 (int (this->actual_procCoords[1][k]) / 2) * 8 * 2400;
235 // The other one is scaled with 8 as in X.
236 procCoords[4][k] = (int (this->actual_procCoords[1][k]) % 2) * 8;
237
238 // We make groups of 8 along Z. Then distances between these
239 // groups are scaled with 160.
240 // So that it is more than 2x distance than the distance with X
241 // grouping.
242 // That is we scale the groups of Zs with 160. Groups of X with 64.
243 // Zs has 8 processors connecting them, while X has only one. We
244 // want to divide along Z twice before dividing along X.
245 procCoords[2][k] =
246 ((int (this->actual_procCoords[2][k])) / 8) * 160;
247 // In the second group everything is scaled with 5, as bw=120
248 procCoords[5][k] = ((int (this->actual_procCoords[2][k])) % 8) * 5;
249 }
250 }
251 else if(optimization_level == 2) {
252 // This is as above case. but we make groups of 3 along X instead.
253 is_transformed = true;
254 this->networkDim = 6;
255 procCoords = new pcoord_t * [networkDim];
256 for(int i = 0; i < networkDim; ++i) {
257 procCoords[i] = new pcoord_t[this->numRanks] ;
258// this->proc_coords[permutation[i]];
259 }
260
261// this->machine_extent[0] = this->actual_machine_extent
262 this->machine_extent = new int[networkDim];
263
264 this->machine_extent[0] =
265 ceil(int (this->actual_machine_extent[0]) / 3.0) * 128 ;
266 this->machine_extent[3] = 3 * 8 ;
267 this->machine_extent[1] =
268 ceil(int (this->actual_machine_extent[1]) / 2.0) * 8 * 2400;
269 this->machine_extent[4] = 2 * 8;
270 this->machine_extent[2] =
271 ceil((int (this->actual_machine_extent[2])) / 8.0) * 160;
272 this->machine_extent[5] = 8 * 5;
273
274
275 for (int k = 0; k < this->numRanks ; k++) {
276 // This part is for titan.
277 // But it holds for other 3D torus machines such as Bluewaters.
278
279 // Bandwitdh along
280 // X = 75
281 // Y = 37.5 or 75 --- everyother has 37.5
282 // --- Y[0-1] =75 but Y[1-2]=37.5
283 // Z = 75 or 120 ---- Y[0-1-2-3-4-5-6-7] = 120, Y[7-8] = 75
284
285 // In this case we make groups of 3. along X.
286 procCoords[0][k] = (int (this->actual_procCoords[0][k]) / 3) * 128;
287 // Then the 3rd dimension is x%2. distance is scaled with 8,
288 // reversely proportional with bw=75
289 procCoords[3][k] = (int (this->actual_procCoords[0][k]) % 3) * 8 ;
290
291 // Along Y. Every other one has the slowest link. So we want
292 // distances between Y/2 huge.
293 // We scale Y/2 with 2400 so that we make sure that it is the
294 // first one we divie.
295 procCoords[1][k] =
296 (int (this->actual_procCoords[1][k]) / 2) * 8 * 2400;
297 // The other one is scaled with 8 as in X.
298 procCoords[4][k] = (int (this->actual_procCoords[1][k]) % 2) * 8;
299
300
301 procCoords[2][k] =
302 ((int (this->actual_procCoords[2][k])) / 8) * 160;
303 // In the second group everything is scaled with 5, as bw=120
304 procCoords[5][k] = ((int (this->actual_procCoords[2][k])) % 8) * 5;
305 }
306 }
307 }
308 }
309
311 if (is_transformed) {
312 is_transformed = false;
313 for (int i = 0; i < actual_networkDim; i++) {
314 delete [] actual_procCoords[i];
315 }
316 delete [] actual_procCoords;
317 delete [] actual_machine_extent;
318 }
319 for (int i = 0; i < networkDim; i++) {
320 delete [] procCoords[i];
321 }
322 delete [] procCoords;
323 delete [] machine_extent;
324 }
325
326 bool hasMachineCoordinates() const { return true; }
327
328 int getMachineDim() const { return this->networkDim; }
329 int getRealMachineDim() const { return this->actual_networkDim; }
330
331 bool getMachineExtent(int *nxyz) const {
332 if (is_transformed) {
333 return false;
334 }
335 else {
336 int dim = 0;
337 nxyz[dim++] = this->machine_extent[0]; //x
338 nxyz[dim++] = this->machine_extent[1]; //y
339 nxyz[dim++] = this->machine_extent[2]; //z
340 return true;
341 }
342 }
343
344 bool getRealMachineExtent(int *nxyz) const {
345 int dim = 0;
346 nxyz[dim++] = 25; //x
347 nxyz[dim++] = 16; //y
348 nxyz[dim++] = 24; //z
349 return true;
350 }
351
352
354 if(this->myRank == 0) {
355 for (int i = 0; i < this->numRanks; ++i) {
356 std::cout << "Rank:" << i
357 << " " << procCoords[0][i]
358 << " " << procCoords[1][i]
359 << " " << procCoords[2][i] << std::endl;
360 }
361 std::cout << "Machine Extent:"
362 << " " << this->machine_extent[0]
363 << " " << this->machine_extent[1]
364 << " " << this->machine_extent[2] << std::endl;
365 }
366 }
367
368 bool getMyMachineCoordinate(pcoord_t *xyz) {
369 for (int i = 0; i < this->networkDim; ++i) {
370 xyz[i] = procCoords[i][this->myRank];
371 }
372 return true;
373 }
374
375 bool getMyActualMachineCoordinate(pcoord_t *xyz) {
376 xyz[0] = rand() % 25;
377 xyz[1] = rand() % 16;
378 xyz[2] = rand() % 24;
379 return true;
380 }
381
382 inline bool getMachineCoordinate(const int rank,
383 pcoord_t *xyz) const {
384 for (int i = 0; i < this->networkDim; ++i) {
385 xyz[i] = procCoords[i][rank];
386 }
387 return true;
388 }
389
390
391 bool getMachineCoordinate(const char *nodename, pcoord_t *xyz) {
392 return false; // cannot yet return from nodename
393 }
394
395 bool getAllMachineCoordinatesView(pcoord_t **&allCoords) const {
396 allCoords = procCoords;
397 return true;
398 }
399
400 virtual bool getHopCount(int rank1, int rank2, pcoord_t &hops) const override {
401 hops = 0;
402 for (int i = 0; i < networkDim; ++i) {
403 pcoord_t distance = procCoords[i][rank1] - procCoords[i][rank2];
404 if (distance < 0)
405 distance = -distance;
406 if (machine_extent[i] - distance < distance)
407 distance = machine_extent[i] - distance;
408 hops += distance;
409 }
410 return true;
411 }
412
413
414private:
415
416 int networkDim;
417 int actual_networkDim;
418
419 pcoord_t **procCoords;
420 pcoord_t **actual_procCoords;
421
422 part_t *machine_extent;
423 part_t *actual_machine_extent;
424 bool is_transformed;
425
426
427 const Teuchos::ParameterList *pl;
428
429/*
430 bool delete_transformed_coords;
431 int transformed_network_dim;
432 pcoord_t **transformed_coordinates;
433*/
434
435 void gatherMachineCoordinates(const Teuchos::Comm<int> &comm) {
436 // reduces and stores all machine coordinates.
437 pcoord_t *tmpVect = new pcoord_t [this->numRanks];
438
439 for (int i = 0; i < networkDim; i++) {
440 Teuchos::reduceAll<int, pcoord_t>(comm, Teuchos::REDUCE_SUM,
441 this->numRanks,
442 procCoords[i], tmpVect);
443 pcoord_t *tmp = tmpVect;
444 tmpVect = procCoords[i];
445 procCoords[i] = tmp;
446 }
447 delete [] tmpVect;
448 }
449
450};
451
452} // namespace Zoltan2
453#endif
An RCA Machine Class (Torus Networks) for testing only A more realistic machine should be used for ta...
bool getMachineCoordinate(const int rank, pcoord_t *xyz) const
virtual bool getMachineExtentWrapArounds(bool *wrap_around) const
MachineTorusRCAForTesting(const Teuchos::Comm< int > &comm, const Teuchos::ParameterList &pl_)
bool getMachineCoordinate(const char *nodename, pcoord_t *xyz)
virtual bool getHopCount(int rank1, int rank2, pcoord_t &hops) const override
getHopCount function set hops between rank1 and rank2 return true if coordinates are available
MachineTorusRCAForTesting(const Teuchos::Comm< int > &comm)
Constructor: A BlueGeneQ network machine description;.
bool getAllMachineCoordinatesView(pcoord_t **&allCoords) const
MachineClass Base class for representing machine coordinates, networks, etc.
Created by mbenlioglu on Aug 31, 2020.
SparseMatrixAdapter_t::part_t part_t