Zoltan2
Loading...
Searching...
No Matches
Zoltan2_MachineDragonflyRCAForTesting.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Zoltan2: A package of combinatorial algorithms for scientific computing
4//
5// Copyright 2012 NTESS and the Zoltan2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef _ZOLTAN2_MACHINE_DRAGONFLY_RCALIBTEST_HPP_
11#define _ZOLTAN2_MACHINE_DRAGONFLY_RCALIBTEST_HPP_
12
13#include <Teuchos_Comm.hpp>
14#include <Teuchos_CommHelpers.hpp>
15#include <Zoltan2_Machine.hpp>
16
17#include <cstdlib> /* srand, rand */
18#include <fstream>
19#include <string>
20
21namespace Zoltan2{
22
30template <typename pcoord_t, typename part_t>
31class MachineDragonflyRCAForTesting : public Machine <pcoord_t, part_t> {
32
33public:
42 MachineDragonflyRCAForTesting(const Teuchos::Comm<int> &comm):
43 Machine<pcoord_t,part_t>(comm),
44 transformed_networkDim(3),
45 actual_networkDim(3),
46 transformed_procCoords(NULL),
47 actual_procCoords(NULL),
48 transformed_machine_extent(NULL),
49 actual_machine_extent(NULL),
50 num_unique_groups(0),
51 group_count(NULL),
52 is_transformed(false),
53 pl(NULL) {
54
55 actual_machine_extent = new int[actual_networkDim];
56 this->getActualMachineExtent(this->actual_machine_extent);
57
58 // Number of ranks in each Dragonfly network group
59 // (i.e. RCA's X coord == Grp g)
60 group_count = new part_t[actual_machine_extent[0]];
61
62 memset(group_count, 0, sizeof(part_t) * actual_machine_extent[0]);
63
64 // Transformed dims = 1 + N_y + N_z
65 transformed_networkDim = 1 + actual_machine_extent[1] +
66 actual_machine_extent[2];
67 transformed_machine_extent = new int[transformed_networkDim];
68
69 // Allocate memory for processor coords
70 actual_procCoords = new pcoord_t *[actual_networkDim];
71 transformed_procCoords = new pcoord_t *[transformed_networkDim];
72
73 for (int i = 0; i < actual_networkDim; ++i) {
74 actual_procCoords[i] = new pcoord_t[this->numRanks];
75 memset(actual_procCoords[i], 0,
76 sizeof(pcoord_t) * this->numRanks);
77 }
78
79 pcoord_t *xyz = new pcoord_t[transformed_networkDim];
81 for (int i = 0; i < actual_networkDim; ++i)
82 actual_procCoords[i][this->myRank] = xyz[i];
83 delete [] xyz;
84
85 // Gather number of ranks in each Dragonfly network group
86 // from across all ranks
87 part_t *tmp_vec = new part_t[actual_machine_extent[0]];
88 memset(tmp_vec, 0, sizeof(part_t) * actual_machine_extent[0]);
89
90 Teuchos::reduceAll<int, part_t>(comm, Teuchos::REDUCE_SUM,
91 actual_machine_extent[0],
92 group_count,
93 tmp_vec);
94
95 // remove zero entries from reduced array
96 num_unique_groups = 0;
97
98 for (int i = 0; i < actual_machine_extent[0]; ++i) {
99 if (tmp_vec[i] > 0) {
100 ++num_unique_groups;
101 }
102 }
103
104 // Reset group_count array to new size
105 delete[] group_count;
106 group_count = new part_t[num_unique_groups];
107
108 int pos = 0;
109 for (int i = 0; i < actual_machine_extent[0]; ++i) {
110 if (tmp_vec[i] > 0) {
111 group_count[pos] = tmp_vec[i];
112 ++pos;
113 }
114 }
115
116 delete[] tmp_vec;
117
118 // reduceAll the coordinates of each processor.
119 gatherMachineCoordinates(this->actual_procCoords,
120 this->actual_networkDim, comm);
121 }
122
123 // No necessary wrap arounds for dragonfly networks. Groups
124 // have wrap around, but group all-to-all connection makes unneccessary.
125 virtual bool getMachineExtentWrapArounds(bool *wrap_around) const {
126 return false;
127 }
128
129
139 MachineDragonflyRCAForTesting(const Teuchos::Comm<int> &comm,
140 const Teuchos::ParameterList &pl_ ):
141 Machine<pcoord_t,part_t>(comm),
142 transformed_networkDim(3),
143 actual_networkDim(3),
144 transformed_procCoords(NULL),
145 actual_procCoords(NULL),
146 transformed_machine_extent(NULL),
147 actual_machine_extent(NULL),
148 num_unique_groups(0),
149 group_count(NULL),
150 is_transformed(false),
151 pl(&pl_) {
152
153 actual_machine_extent = new int[actual_networkDim];
154 this->getActualMachineExtent(this->actual_machine_extent);
155
156 // Number of parts in each Dragonfly network group
157 // (i.e. RCA's X coord == Grp g)
158 group_count = new part_t[actual_machine_extent[0]];
159
160 memset(group_count, 0, sizeof(part_t) * actual_machine_extent[0]);
161
162 // Allocate memory for processor coords
163 actual_procCoords = new pcoord_t *[actual_networkDim];
164 transformed_procCoords = new pcoord_t *[transformed_networkDim];
165
166 pcoord_t *xyz = new pcoord_t[actual_networkDim];
168
169 // Gather number of ranks in each Dragonfly network group
170 // from across all ranks
171 part_t *tmp_vec = new part_t[actual_machine_extent[0]];
172 memset(tmp_vec, 0, sizeof(part_t) * actual_machine_extent[0]);
173
174 Teuchos::reduceAll<int, part_t>(comm, Teuchos::REDUCE_SUM,
175 actual_machine_extent[0],
176 group_count,
177 tmp_vec);
178
179 // remove zero entries from reduced vector
180 num_unique_groups = 0;
181
182 for (int i = 0; i < actual_machine_extent[0]; ++i) {
183 if (tmp_vec[i] > 0) {
184 ++num_unique_groups;
185 }
186 }
187
188 // Reset group_count array to new size (# of nonzero groups)
189 delete[] group_count;
190 group_count = new part_t[num_unique_groups];
191
192 int pos = 0;
193 for (int i = 0; i < actual_machine_extent[0]; ++i)
194 {
195 if (tmp_vec[i] > 0) {
196 group_count[pos] = tmp_vec[i];
197 ++pos;
198 }
199 }
200 delete[] tmp_vec;
201
202 const Teuchos::ParameterEntry *pe2 =
203 this->pl->getEntryPtr("Machine_Optimization_Level");
204
205 // Transform with mach opt level
206 if (pe2) {
207 int optimization_level;
208 optimization_level = pe2->getValue<int>(&optimization_level);
209
210 if (optimization_level > 0) {
211 is_transformed = true;
212
213 // Transformed dims = 1 + N_y + N_z
214 transformed_networkDim = 1 + actual_machine_extent[1] +
215 actual_machine_extent[2];
216 transformed_machine_extent = new int[transformed_networkDim];
217
218 transformed_procCoords = new pcoord_t *[transformed_networkDim];
219
220 // Allocate memory for transformed coordinates
221 for (int i = 0; i < transformed_networkDim; ++i) {
222 transformed_procCoords[i] = new pcoord_t[this->numRanks];
223 memset(transformed_procCoords[i], 0,
224 sizeof(pcoord_t) * this->numRanks);
225 }
226
227 // Calculate transformed coordinates and machine extents
228 int nx = this->actual_machine_extent[0];
229 int ny = this->actual_machine_extent[1];
230 int nz = this->actual_machine_extent[2];
231
232 const Teuchos::ParameterEntry *pe_x =
233 this->pl->getEntryPtr("Machine_X_Stretch");
234 const Teuchos::ParameterEntry *pe_y =
235 this->pl->getEntryPtr("Machine_Y_Stretch");
236 const Teuchos::ParameterEntry *pe_z =
237 this->pl->getEntryPtr("Machine_Z_Stretch");
238
239 // Default X,Y,Z stretches
240 int x_stretch = 3;
241 int y_stretch = 2;
242 int z_stretch = 1;
243
244 if (pe_x)
245 x_stretch = pe_x->getValue<int>(&x_stretch);
246 if (pe_y)
247 y_stretch = pe_y->getValue<int>(&y_stretch);
248 if (pe_x)
249 z_stretch = pe_z->getValue<int>(&z_stretch);
250
251 // Transform X coords
252 transformed_procCoords[0][this->myRank] =
253 x_stretch * xyz[0] * ny * nz;
254
255 // Transform Y coords
256 for (int i = 1; i < 1 + ny; ++i) {
257 // Shift y-coord given a group, xyz[0];
258 transformed_procCoords[i][this->myRank] = 0;
259 // Increment in the dim where y-coord present
260 if (xyz[1] == i - 1) {
261 transformed_procCoords[i][this->myRank] = y_stretch;
262 }
263 }
264 // Transform Z coords
265 for (int i = 1 + ny; i < transformed_networkDim; ++i) {
266 // Shift z-coord given a group, xyz[0];
267 transformed_procCoords[i][this->myRank] = 0;
268 // Increment in the dim where z-coord present
269 if (xyz[2] == i - (1 + ny))
270 transformed_procCoords[i][this->myRank] = z_stretch;
271 }
272
273 this->transformed_machine_extent = new int[transformed_networkDim];
274
275 // Maximum extents in shifted high dim coordinate system
276 this->transformed_machine_extent[0] = x_stretch * (nx - 1) * ny * nz;
277 for (int i = 1; i < 1 + ny; ++i) {
278 this->transformed_machine_extent[i] = y_stretch;
279 }
280 for (int i = 1 + ny; i < transformed_networkDim; ++i) {
281 this->transformed_machine_extent[i] = z_stretch;
282 }
283
284 // reduceAll the transformed coordinates of each processor.
285 gatherMachineCoordinates(this->transformed_procCoords,
286 this->transformed_networkDim, comm);
287
288 this->printAllocation();
289 }
290 }
291 // If no coordinate transformation, gather actual coords
292 if (!is_transformed) {
293
294 for (int i = 0; i < actual_networkDim; ++i) {
295 actual_procCoords[i] = new pcoord_t[this->numRanks];
296 memset(actual_procCoords[i], 0,
297 sizeof(pcoord_t) * this->numRanks);
298 }
299
300 for (int i = 0; i < actual_networkDim; ++i)
301 actual_procCoords[i][this->myRank] = xyz[i];
302
303 // reduceAll the actual coordinates of each processor
304 gatherMachineCoordinates(this->actual_procCoords,
305 this->actual_networkDim, comm);
306
307 this->printAllocation();
308 }
309 delete [] xyz;
310 }
311
312 // Destructor
314 if (is_transformed) {
315 is_transformed = false;
316 if (this->numRanks > 1) {
317 for (int i = 0; i < transformed_networkDim; ++i) {
318 delete [] transformed_procCoords[i];
319 }
320 }
321 delete [] transformed_machine_extent;
322 }
323 else {
324 if (this->numRanks > 1) {
325 for (int i = 0; i < actual_networkDim; ++i) {
326 delete [] actual_procCoords[i];
327 }
328 }
329 }
330
331 delete [] actual_procCoords;
332 delete [] transformed_procCoords;
333
334 delete [] actual_machine_extent;
335 delete [] group_count;
336 }
337
338 bool hasMachineCoordinates() const { return true; }
339
340 // Return dimensions of coords, transformed or actual
341 int getMachineDim() const {
342 if (is_transformed)
343 return this->transformed_networkDim;
344 else
345 return this->actual_networkDim;
346 }
347
348 // Return the transformed maximum machine extents
349 bool getTransformedMachineExtent(int *nxyz) const {
350 if (is_transformed) {
351 for (int dim = 0; dim < transformed_networkDim; ++dim)
352 nxyz[dim] = this->transformed_machine_extent[dim];
353
354 return true;
355 }
356 else
357 return false;
358 }
359
360 // Return the fake "RCA" machine extents for testing
361 bool getActualMachineExtent(int *nxyz) const {
362/*
363#if defined (HAVE_ZOLTAN2_RCALIB)
364 mesh_coord_t mxyz;
365 rca_get_max_dimension(&mxyz);
366
367 int dim = 0;
368 nxyz[dim++] = mxyz.mesh_x + 1; // X - group [0, ~100]
369 nxyz[dim++] = mxyz.mesh_y + 1; // Y - row within group [0, 5]
370 nxyz[dim++] = mxyz.mesh_z + 1; // Z - col within row [0, 15]
371 return true;
372#else
373 return false;
374#endif
375*/
376
377 nxyz[0] = 11; // X - group
378 nxyz[1] = 6; // Y - row within group
379 nxyz[2] = 16; // Z - col within group
380
381 // Needed for test/unit_test/Machine.cpp PASS
382// nxyz[0] = 4;
383// nxyz[1] = 8;
384// nxyz[2] = 12;
385
386 return true;
387 }
388
389 // Return machine extents, transformed or actual
390 bool getMachineExtent(int *nxyz) const {
391 if (is_transformed)
392 this->getTransformedMachineExtent(nxyz);
393 else
394 this->getActualMachineExtent(nxyz);
395
396 return true;
397 }
398
399 // Return number of groups (RCA X-dim) with allocated nodes
400 part_t getNumUniqueGroups() const override{
401 return this->num_unique_groups;
402 }
403
404 // Return number of ranks in each group (RCA X-dim) in an allocation
405 bool getGroupCount(part_t *grp_count) const override {
406
407 if (group_count != NULL) {
408 for (int i = 0; i < num_unique_groups; ++i) {
409 grp_count[i] = this->group_count[i];
410 }
411
412 return true;
413 }
414 else
415 return false;
416 }
417
418 // Print allocation coords and extents on rank 0, transformed or actual
420 if (this->myRank >= 0) {
421 // Print transformed coordinates and extents
422 if (is_transformed) {
423 for (int i = 0; i < this->numRanks; ++i) {
424 std::cout << "Rank:" << i << " ";
425 for (int j = 0; j < this->transformed_networkDim; ++j) {
426 std::cout << " " << this->transformed_procCoords[j][i];
427 }
428 std::cout << std::endl;
429 }
430
431 std::cout << std::endl << "Transformed Machine Extent: ";
432 for (int i = 0; i < this->transformed_networkDim; ++i) {
433 std::cout << " " << this->transformed_machine_extent[i];
434 }
435 std::cout << std::endl;
436 }
437 // Print actual coordinates and extents
438 else {
439 for (int i = 0; i < this->numRanks; ++i) {
440 std::cout << "Rank:" << i;
441 for (int j = 0; j < this->actual_networkDim; ++j) {
442 std::cout << " " << actual_procCoords[j][i];
443 }
444 std::cout << std::endl;
445 }
446
447 std::cout << std::endl << "Actual Machine Extent: ";
448 for (int i = 0; i < this->actual_networkDim; ++i) {
449 std::cout << " " << this->actual_machine_extent[i];
450 }
451 std::cout << std::endl;
452 }
453 }
454 }
455
456 // Return transformed coord for this rank
458 if (is_transformed) {
459 for (int i = 0; i < this->transformed_networkDim; ++i) {
460 xyz[i] = transformed_procCoords[i][this->myRank];
461 }
462
463 return true;
464 }
465 else
466 return false;
467 }
468
469 // Return the fake "RCA" coord for this rank for testing
470 bool getMyActualMachineCoordinate(pcoord_t *xyz) {
471/*
472#if defined (HAVE_ZOLTAN2_RCALIB)
473 // Cray node info for current node
474 rs_node_t nodeInfo;
475 rca_get_nodeid(&nodeInfo);
476
477 // Current node ID
478 int NIDs = (int)nodeInfo.rs_node_s._node_id;
479
480 mesh_coord_t node_coord;
481 int returnval = rca_get_meshcoord((uint16_t)NIDs, &node_coord);
482 if (returnval == -1) {
483 return false;
484 }
485 xyz[0] = node_coord.mesh_x;
486 xyz[1] = node_coord.mesh_y;
487 xyz[2] = node_coord.mesh_z;
488 return true;
489#else
490 return false;
491#endif
492*/
493 srand(this->myRank);
494
495 int x = rand() % 11;
496 int y = rand() % 6;
497 int z = rand() % 16;
498
499 xyz[0] = x;
500 xyz[1] = y;
501 xyz[2] = z;
502
503 // Needed for test/unit_test/Machine.cpp PASS
504// xyz[0] = this->myRank;
505// xyz[1] = this->numRanks;
506// xyz[2] = this->numRanks + 1;
507
508 group_count[x]++;
509
510 return true;
511 }
512
513 // Return machine coordinate for this rank, transformed or actual
514 bool getMyMachineCoordinate(pcoord_t *xyz) {
515 if (is_transformed)
517 else
519
520 return true;
521 }
522
523 // Return machine coord of given rank, transformed or actual
524 inline bool getMachineCoordinate(const int rank,
525 pcoord_t *xyz) const {
526 if (is_transformed) {
527 for (int i = 0; i < this->transformed_networkDim; ++i) {
528 xyz[i] = transformed_procCoords[i][rank];
529 }
530 }
531 else {
532 for (int i = 0; i < this->actual_networkDim; ++i) {
533 xyz[i] = actual_procCoords[i][rank];
534 }
535 }
536
537 return true;
538 }
539
540 bool getMachineCoordinate(const char *nodename, pcoord_t *xyz) {
541 return false; // cannot yet return from nodename
542 }
543
544 // Return view of all machine coords, transformed or actual
545 bool getAllMachineCoordinatesView(pcoord_t **&allCoords) const {
546 if (is_transformed) {
547 allCoords = transformed_procCoords;
548 }
549 else {
550 allCoords = actual_procCoords;
551 }
552
553 return true;
554 }
555
556 // Return (approx) hop count from rank1 to rank2. Does not account for
557 // Dragonfly's dynamic routing.
558 virtual bool getHopCount(int rank1, int rank2, pcoord_t &hops) const override {
559 hops = 0;
560
561 if (rank1 == rank2)
562 return true;
563 if (rank1 >= this->numRanks || rank2 >= this->numRanks) {
564 std::cerr << "Rank outside bounds for the machine ranks";
565 exit(1);
566 }
567
568 if (this->is_transformed) {
569 // Case: ranks in different groups (i.e. different RCA x-coords)
570 // Does not account for location of group to group connection.
571 // (Most group to group messages will take 5 hops)
572 if (this->transformed_procCoords[0][rank1] !=
573 this->transformed_procCoords[0][rank2])
574 {
575 hops = 5;
576
577 return true;
578 }
579
580 // Case: ranks in same group
581 // For each 2 differences in transformed_coordinates then
582 // 1 hop
583 for (int i = 1; i < this->transformed_networkDim; ++i) {
584 if (this->transformed_procCoords[i][rank1] !=
585 this->transformed_procCoords[i][rank2])
586 ++hops;
587 }
588 hops /= 2;
589 }
590 else {
591 // Case: ranks in different groups
592 // Does not account for location of group to group connection.
593 // (Nearly all group to group messages will take 5 hops)
594 if (this->actual_procCoords[0][rank1] !=
595 this->actual_procCoords[0][rank2])
596 {
597 hops = 5;
598 return true;
599 }
600
601 // Case: ranks in same group
602 // For each difference in actual_coordinates then
603 // 1 hop
604 for (int i = 1; i < this->actual_networkDim; ++i) {
605 if (this->actual_procCoords[i][rank1] !=
606 this->actual_procCoords[i][rank2])
607 ++hops;
608 }
609 }
610
611 return true;
612 }
613
614private:
615
616 // # of dimensions in the stored coordinates, transformed or actual
617 int transformed_networkDim;
618 int actual_networkDim;
619
620 // Machine Coordinates
621 pcoord_t **transformed_procCoords;
622 pcoord_t **actual_procCoords;
623
624 // Maximum extents for each dimension, transformed or actual
625 part_t *transformed_machine_extent;
626 part_t *actual_machine_extent;
627
628 // Number of groups (RCA X-dim) with nonzero nodes allocated
629 part_t num_unique_groups;
630 // Distribution of nodes in each group (zero node groups have been trimmed)
631 part_t *group_count;
632
633 // Are out coordinates transformed?
634 bool is_transformed;
635
636 const Teuchos::ParameterList *pl;
637
638
639 // reduceAll the machine coordinates
640 void gatherMachineCoordinates(pcoord_t **&coords, int netDim,
641 const Teuchos::Comm<int> &comm) {
642 // Reduces and stores all machine coordinates.
643 pcoord_t *tmpVect = new pcoord_t [this->numRanks];
644
645 for (int i = 0; i < netDim; ++i) {
646 Teuchos::reduceAll<int, pcoord_t>(comm, Teuchos::REDUCE_SUM,
647 this->numRanks,
648 coords[i], tmpVect);
649 pcoord_t *tmp = tmpVect;
650 tmpVect = coords[i];
651 coords[i] = tmp;
652 }
653 delete [] tmpVect;
654 }
655
656};
657
658} // namespace Zoltan2
659
660#endif
A Dragonfly (e.g. Cori, Trinity, Theta) Machine Class for testing only. A more realistic machine shou...
MachineDragonflyRCAForTesting(const Teuchos::Comm< int > &comm, const Teuchos::ParameterList &pl_)
Constructor: Dragonfly (e.g. Cori & Trinity) network machine description;.
virtual bool getHopCount(int rank1, int rank2, pcoord_t &hops) const override
getHopCount function set hops between rank1 and rank2 return true if coordinates are available
bool getMachineCoordinate(const char *nodename, pcoord_t *xyz)
part_t getNumUniqueGroups() const override
getNumUniqueGroups function return the number of unique Dragonfly network groups in provided allocati...
virtual bool getMachineExtentWrapArounds(bool *wrap_around) const
bool getGroupCount(part_t *grp_count) const override
getGroupCount function return the number of ranks in each group (RCA X-dim, e.g. first dim)
bool getMachineCoordinate(const int rank, pcoord_t *xyz) const
MachineDragonflyRCAForTesting(const Teuchos::Comm< int > &comm)
Constructor: Dragonfly (e.g. Cori & Trinity) RCA network machine description;.
MachineClass Base class for representing machine coordinates, networks, etc.
Created by mbenlioglu on Aug 31, 2020.
SparseMatrixAdapter_t::part_t part_t