Zoltan2
Loading...
Searching...
No Matches
Zoltan2_MachineDragonflyRCA.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Zoltan2: A package of combinatorial algorithms for scientific computing
4//
5// Copyright 2012 NTESS and the Zoltan2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef _ZOLTAN2_MACHINE_DRAGONFLY_RCALIB_HPP_
11#define _ZOLTAN2_MACHINE_DRAGONFLY_RCALIB_HPP_
12
13#include <Teuchos_Comm.hpp>
14#include <Teuchos_CommHelpers.hpp>
15#include <Zoltan2_Machine.hpp>
16
17#ifdef HAVE_ZOLTAN2_RCALIB
18extern "C"{
19#include <rca_lib.h>
20}
21#endif
22
23namespace Zoltan2{
24
81template <typename pcoord_t, typename part_t>
82class MachineDragonflyRCA : public Machine <pcoord_t, part_t> {
83
84public:
85
93 MachineDragonflyRCA(const Teuchos::Comm<int> &comm):
94 Machine<pcoord_t,part_t>(comm),
95 transformed_networkDim(3),
96 actual_networkDim(3),
97 transformed_procCoords(NULL),
98 actual_procCoords(NULL),
99 transformed_machine_extent(NULL),
100 actual_machine_extent(NULL),
101 num_unique_groups(0),
102 group_count(NULL),
103 is_transformed(false),
104 pl(NULL) {
105
106 actual_machine_extent = new int[actual_networkDim];
107 this->getActualMachineExtent(this->actual_machine_extent);
108
109 // Number of ranks in each Dragonfly network group
110 // (i.e. RCA's X coord == Grp g)
111 group_count = new part_t[actual_machine_extent[0]];
112
113 memset(group_count, 0, sizeof(part_t) * actual_machine_extent[0]);
114
115 // Transformed dims = 1 + N_y + N_z
116 transformed_networkDim = 1 + actual_machine_extent[1] +
117 actual_machine_extent[2];
118 transformed_machine_extent = new int[transformed_networkDim];
119
120 // Allocate memory for processor coords
121 actual_procCoords = new pcoord_t *[actual_networkDim];
122 transformed_procCoords = new pcoord_t *[transformed_networkDim];
123
124 for (int i = 0; i < actual_networkDim; ++i) {
125 actual_procCoords[i] = new pcoord_t[this->numRanks];
126 memset(actual_procCoords[i], 0,
127 sizeof(pcoord_t) * this->numRanks);
128 }
129
130 pcoord_t *xyz = new pcoord_t[transformed_networkDim];
132 for (int i = 0; i < actual_networkDim; ++i)
133 actual_procCoords[i][this->myRank] = xyz[i];
134 delete [] xyz;
135
136 // Gather number of ranks in each Dragonfly network group from
137 // across all ranks
138 part_t * tmp_vec = new part_t[actual_machine_extent[0]];
139 memset(tmp_vec, 0, sizeof(part_t) * actual_machine_extent[0]);
140
141 Teuchos::reduceAll<int, part_t>(comm, Teuchos::REDUCE_SUM,
142 actual_machine_extent[0],
143 group_count,
144 tmp_vec);
145
146 // remove zero entries from reduced array
147 num_unique_groups = 0;
148
149 for (int i = 0; i < actual_machine_extent[0]; ++i) {
150 if (tmp_vec[i] > 0) {
151 ++num_unique_groups;
152 }
153 }
154
155 // Reset group_count array to new size
156 delete[] group_count;
157 group_count = new part_t[num_unique_groups];
158
159 int pos = 0;
160 for (int i = 0; i < actual_machine_extent[0]; ++i) {
161 if (tmp_vec[i] > 0) {
162 group_count[pos] = tmp_vec[i];
163 ++pos;
164 }
165 }
166
167 delete[] tmp_vec;
168
169 // reduceAll the coordinates of each processor.
170 gatherMachineCoordinates(this->actual_procCoords,
171 this->actual_networkDim, comm);
172 }
173
174 // No necessary wrap arounds for dragonfly networks. Groups
175 // have wrap around, but group all-to-all connection makes unneccessary.
176 virtual bool getMachineExtentWrapArounds(bool *wrap_around) const {
177 return false;
178 }
179
180
190 MachineDragonflyRCA(const Teuchos::Comm<int> &comm,
191 const Teuchos::ParameterList &pl_ ):
192 Machine<pcoord_t,part_t>(comm),
193 transformed_networkDim(3),
194 actual_networkDim(3),
195 transformed_procCoords(NULL),
196 actual_procCoords(NULL),
197 transformed_machine_extent(NULL),
198 actual_machine_extent(NULL),
199 num_unique_groups(0),
200 group_count(NULL),
201 is_transformed(false),
202 pl(&pl_)
203 {
204 actual_machine_extent = new int[actual_networkDim];
205 this->getActualMachineExtent(this->actual_machine_extent);
206
207 // Number of parts in each Group (i.e. RCA's X coord == Grp g)
208 group_count = new part_t[actual_machine_extent[0]];
209
210 memset(group_count, 0, sizeof(part_t) * actual_machine_extent[0]);
211
212 // Allocate memory for processor coords
213 actual_procCoords = new pcoord_t *[actual_networkDim];
214 transformed_procCoords = new pcoord_t *[transformed_networkDim];
215
216 pcoord_t *xyz = new pcoord_t[actual_networkDim];
218
219 // Gather number of ranks in each Dragonfly network group
220 // from across all ranks
221 part_t * tmp_vec = new part_t[actual_machine_extent[0]];
222 memset(tmp_vec, 0, sizeof(part_t) * actual_machine_extent[0]);
223
224 Teuchos::reduceAll<int, part_t>(comm, Teuchos::REDUCE_SUM,
225 actual_machine_extent[0],
226 group_count,
227 tmp_vec);
228
229 // Remove zero entries from reduced array
230 num_unique_groups = 0;
231
232 for (int i = 0; i < actual_machine_extent[0]; ++i) {
233 if (tmp_vec[i] > 0) {
234 ++num_unique_groups;
235 }
236 }
237
238 // Reset group_count array to new size
239 delete[] group_count;
240 group_count = new part_t[num_unique_groups];
241
242 int pos = 0;
243 for (int i = 0; i < actual_machine_extent[0]; ++i) {
244 if (tmp_vec[i] > 0) {
245 group_count[pos] = tmp_vec[i];
246 ++pos;
247 }
248 }
249 delete[] tmp_vec;
250
251 const Teuchos::ParameterEntry *pe2 =
252 this->pl->getEntryPtr("Machine_Optimization_Level");
253
254 // Transform with mach opt level
255 if (pe2) {
256 int optimization_level;
257 optimization_level = pe2->getValue<int>(&optimization_level);
258
259 if (optimization_level > 0) {
260 is_transformed = true;
261
262 // Transformed dims = 1 + N_y + N_z
263 transformed_networkDim = 1 + actual_machine_extent[1] +
264 actual_machine_extent[2];
265 transformed_machine_extent = new int[transformed_networkDim];
266
267 transformed_procCoords = new pcoord_t *[transformed_networkDim];
268
269 // Allocate memory for transformed coordinates
270 for (int i = 0; i < transformed_networkDim; ++i) {
271 transformed_procCoords[i] = new pcoord_t[this->numRanks];
272 memset(transformed_procCoords[i], 0,
273 sizeof(pcoord_t) * this->numRanks);
274 }
275
276 // Calculate transformed coordinates and machine extents
277 int nx = this->actual_machine_extent[0];
278 int ny = this->actual_machine_extent[1];
279 int nz = this->actual_machine_extent[2];
280
281 const Teuchos::ParameterEntry *pe_x =
282 this->pl->getEntryPtr("Machine_X_Stretch");
283 const Teuchos::ParameterEntry *pe_y =
284 this->pl->getEntryPtr("Machine_Y_Stretch");
285 const Teuchos::ParameterEntry *pe_z =
286 this->pl->getEntryPtr("Machine_Z_Stretch");
287
288 // Default X,Y,Z stretches
289 int x_stretch = 3;
290 int y_stretch = 2;
291 int z_stretch = 1;
292
293 if (pe_x)
294 x_stretch = pe_x->getValue<int>(&x_stretch);
295 if (pe_y)
296 y_stretch = pe_y->getValue<int>(&y_stretch);
297 if (pe_z)
298 z_stretch = pe_z->getValue<int>(&z_stretch);
299
300 // Transform X coords
301 transformed_procCoords[0][this->myRank] =
302 x_stretch * xyz[0] * ny * nz;
303
304 // Transform Y coords
305 for (int i = 1; i < 1 + ny; ++i) {
306 // Shift y-coord given a group, xyz[0];
307 transformed_procCoords[i][this->myRank] = 0;
308 // Increment in the dim where y-coord present
309 if (xyz[1] == i - 1)
310 transformed_procCoords[i][this->myRank] = y_stretch;
311 }
312 // Transform Z coords
313 for (int i = 1 + ny; i < transformed_networkDim; ++i) {
314 // Shift z-coord given a group, xyz[0];
315 transformed_procCoords[i][this->myRank] = 0;
316 // Increment in the dim where z-coord present
317 if (xyz[2] == i - (1 + ny))
318 transformed_procCoords[i][this->myRank] = z_stretch;
319 }
320
321 this->transformed_machine_extent = new int[transformed_networkDim];
322
323 // Maximum extents in shifted high dim coordinate system
324 this->transformed_machine_extent[0] = x_stretch * (nx - 1) * ny * nz;
325 for (int i = 1; i < 1 + ny; ++i) {
326 this->transformed_machine_extent[i] = y_stretch;
327 }
328 for (int i = 1 + ny; i < transformed_networkDim; ++i) {
329 this->transformed_machine_extent[i] = z_stretch;
330 }
331
332 // reduceAll the transformed coordinates of each processor.
333 gatherMachineCoordinates(this->transformed_procCoords,
334 this->transformed_networkDim, comm);
335
336 this->printAllocation();
337 }
338 }
339 // If no coordinate transformation, gather actual coords
340 if (!is_transformed) {
341
342 for (int i = 0; i < actual_networkDim; ++i) {
343 actual_procCoords[i] = new pcoord_t[this->numRanks];
344 memset(actual_procCoords[i], 0,
345 sizeof(pcoord_t) * this->numRanks);
346 }
347
348 for (int i = 0; i < actual_networkDim; ++i)
349 actual_procCoords[i][this->myRank] = xyz[i];
350
351 // reduceAll the actual coordinates of each processor
352 gatherMachineCoordinates(this->actual_procCoords,
353 this->actual_networkDim, comm);
354
355 this->printAllocation();
356 }
357 delete [] xyz;
358 }
359
360 // Destructor
362 if (is_transformed) {
363 is_transformed = false;
364 if (this->numRanks > 1) {
365 for (int i = 0; i < transformed_networkDim; ++i) {
366 delete [] transformed_procCoords[i];
367 }
368 }
369 delete [] transformed_machine_extent;
370 }
371 else {
372 if (this->numRanks > 1) {
373 for (int i = 0; i < actual_networkDim; ++i) {
374 delete [] actual_procCoords[i];
375 }
376 }
377 }
378
379 delete [] actual_procCoords;
380 delete [] transformed_procCoords;
381
382 delete [] actual_machine_extent;
383 delete [] group_count;
384 }
385
386 bool hasMachineCoordinates() const { return true; }
387
388 // Return dimensions of coords, transformed or actual
389 int getMachineDim() const {
390 if (is_transformed)
391 return this->transformed_networkDim;
392 else
393 return this->actual_networkDim;
394 }
395
396 // Return the transformed maximum machine extents
397 bool getTransformedMachineExtent(int *nxyz) const {
398 if (is_transformed) {
399 for (int dim = 0; dim < transformed_networkDim; ++dim)
400 nxyz[dim] = this->transformed_machine_extent[dim];
401
402 return true;
403 }
404 else
405 return false;
406 }
407
408 // Return the actual RCA maximum machine extents
409 bool getActualMachineExtent(int *nxyz) const {
410#if defined (HAVE_ZOLTAN2_RCALIB)
411 mesh_coord_t mxyz;
412 rca_get_max_dimension(&mxyz);
413
414 int dim = 0; // Example extents on Cori
415 nxyz[dim++] = mxyz.mesh_x + 1; // X - group [0, ~100]
416 nxyz[dim++] = mxyz.mesh_y + 1; // Y - row within group [0, 5]
417 nxyz[dim++] = mxyz.mesh_z + 1; // Z - col within row [0, 15]
418 return true;
419#else
420 return false;
421#endif
422 }
423
424 // Return machine extents, transformed or actual
425 bool getMachineExtent(int *nxyz) const {
426 if (is_transformed)
427 this->getTransformedMachineExtent(nxyz);
428 else
429 this->getActualMachineExtent(nxyz);
430
431 return true;
432 }
433
434 // Return number of groups (RCA X-dim) with allocated nodes
435 part_t getNumUniqueGroups() const override{
436 return this->num_unique_groups;
437 }
438
439 // Return number of ranks in each group (RCA X-dim) in an allocation
440 bool getGroupCount(part_t *grp_count) const override {
441
442 if (group_count != NULL) {
443 for (int i = 0; i < num_unique_groups; ++i) {
444 grp_count[i] = this->group_count[i];
445 }
446
447 return true;
448 }
449 else
450 return false;
451 }
452
453 // Print allocation coords and extents on rank 0, transformed or actual
455 if (this->myRank == 0) {
456 // Print transformed coordinates and extents
457 if (is_transformed) {
458 for (int i = 0; i < this->numRanks; ++i) {
459 std::cout << "Rank:" << i;
460 for (int j = 0; j < this->transformed_networkDim; ++j) {
461 std::cout << " " << transformed_procCoords[j][i];
462 }
463 std::cout << std::endl;
464 }
465
466 std::cout << std::endl << "Transformed Machine Extent: ";
467 for (int i = 0; i < this->transformed_networkDim; ++i) {
468 std::cout << " " << this->transformed_machine_extent[i];
469 }
470 std::cout << std::endl;
471 }
472 // Print actual coordinates and extents
473 else {
474 for (int i = 0; i < this->numRanks; ++i) {
475 std::cout << "Rank:" << i;
476 for (int j = 0; j < this->actual_networkDim; ++j) {
477 std::cout << " " << actual_procCoords[j][i];
478 }
479 std::cout << std::endl;
480 }
481
482 std::cout << std::endl << "Actual Machine Extent: ";
483 for (int i = 0; i < this->actual_networkDim; ++i) {
484 std::cout << " " << this->actual_machine_extent[i];
485 }
486 std::cout << std::endl;
487 }
488 }
489 }
490
491 // Return transformed coord for this rank
493 if (is_transformed) {
494 for (int i = 0; i < this->transformed_networkDim; ++i) {
495 xyz[i] = transformed_procCoords[i][this->myRank];
496 }
497
498 return true;
499 }
500 else
501 return false;
502 }
503
504 // Return actual RCA coord for this rank
505 bool getMyActualMachineCoordinate(pcoord_t *xyz) {
506#if defined (HAVE_ZOLTAN2_RCALIB)
507 // Cray node info for current node
508 rs_node_t nodeInfo;
509 rca_get_nodeid(&nodeInfo);
510
511 // Current node ID
512 int NIDs = (int)nodeInfo.rs_node_s._node_id;
513
514 mesh_coord_t node_coord;
515 int returnval = rca_get_meshcoord((uint16_t)NIDs, &node_coord);
516 if (returnval == -1) {
517 return false;
518 }
519
520 int x = node_coord.mesh_x;
521 int y = node_coord.mesh_y;
522 int z = node_coord.mesh_z;
523
524 xyz[0] = x;
525 xyz[1] = y;
526 xyz[2] = z;
527
528 group_count[x]++;
529
530 return true;
531#else
532 return false;
533#endif
534 }
535
536 // Return machine coordinate for this rank, transformed or actual
537 bool getMyMachineCoordinate(pcoord_t *xyz) {
538 if (is_transformed)
540 else
542
543 return true;
544 }
545
546 // Return machine coord of given rank, transformed or actual
547 inline bool getMachineCoordinate(const int rank,
548 pcoord_t *xyz) const {
549 if (is_transformed) {
550 for (int i = 0; i < this->transformed_networkDim; ++i) {
551 xyz[i] = transformed_procCoords[i][rank];
552 }
553 }
554 else {
555 for (int i = 0; i < this->actual_networkDim; ++i) {
556 xyz[i] = actual_procCoords[i][rank];
557 }
558 }
559
560 return true;
561 }
562
563 bool getMachineCoordinate(const char *nodename, pcoord_t *xyz) {
564 return false; // cannot yet return from nodename
565 }
566
567 // Return view of all machine coords, transformed or actual
568 bool getAllMachineCoordinatesView(pcoord_t **&allCoords) const {
569 if (is_transformed) {
570 allCoords = transformed_procCoords;
571 }
572 else {
573 allCoords = actual_procCoords;
574 }
575
576 return true;
577 }
578
579 // Return (approx) hop count from rank1 to rank2. Does not account for
580 // Dragonfly's dynamic routing.
581 virtual bool getHopCount(int rank1, int rank2, pcoord_t &hops) const override {
582 hops = 0;
583
584 if (is_transformed) {
585 // Case: ranks in different groups
586 // Does not account for location of group to group connection.
587 // (Most group to group messages will take 5 hops)
588 if (this->transformed_procCoords[0][rank1] !=
589 this->transformed_procCoords[0][rank2])
590 {
591 hops = 5;
592 return true;
593 }
594
595 // Case: ranks in same group
596 // For each 2 differences in transformed_coordinates then
597 // 1 hop
598 for (int i = 1; i < this->transformed_networkDim; ++i) {
599 if (this->transformed_procCoords[i][rank1] !=
600 this->transformed_procCoords[i][rank2])
601 ++hops;
602 }
603 hops /= 2;
604 }
605 else {
606 // Case: ranks in different groups
607 // Does not account for location of group to group connection.
608 // (Nearly all group to group messages will take 5 hops)
609 if (this->actual_procCoords[0][rank1] !=
610 this->actual_procCoords[0][rank2])
611 {
612 hops = 5;
613 return true;
614 }
615
616 // Case: ranks in same group
617 // For each difference in actual_coordinates then
618 // 1 hop
619 for (int i = 1; i < actual_networkDim; ++i) {
620 if (this->actual_procCoords[i][rank1] !=
621 this->actual_procCoords[i][rank2])
622 ++hops;
623 }
624 }
625
626 return true;
627 }
628
629private:
630
631 // # of dimensions in the stored coordinates, transformed or actual
632 int transformed_networkDim;
633 int actual_networkDim;
634
635 // Machine Coordinates
636 pcoord_t **transformed_procCoords;
637 pcoord_t **actual_procCoords;
638
639 // Maximum extents for each dimension, transformed or actual
640 part_t *transformed_machine_extent;
641 part_t *actual_machine_extent;
642
643 // Number of groups (RCA X-dim) with nonzero nodes allocated
644 part_t num_unique_groups;
645 // Distribution of nodes in each group (zero node groups have been trimmed)
646 part_t *group_count;
647
648 // Are our coordinates transformed?
649 bool is_transformed;
650
651 const Teuchos::ParameterList *pl;
652
653 // reduceAll the machine coordinates
654 void gatherMachineCoordinates(pcoord_t **&coords, int netDim,
655 const Teuchos::Comm<int> &comm) {
656 // Reduces and stores all machine coordinates.
657 pcoord_t *tmpVect = new pcoord_t [this->numRanks];
658
659 for (int i = 0; i < netDim; ++i) {
660 Teuchos::reduceAll<int, pcoord_t>(comm, Teuchos::REDUCE_SUM,
661 this->numRanks,
662 coords[i], tmpVect);
663 pcoord_t *tmp = tmpVect;
664 tmpVect = coords[i];
665 coords[i] = tmp;
666 }
667 delete [] tmpVect;
668 }
669
670};
671
672} // namespace Zoltan2
673
674#endif
A Dragonfly (e.g. Cori, Trinity, & Theta) Machine Class for task mapping.
MachineDragonflyRCA(const Teuchos::Comm< int > &comm, const Teuchos::ParameterList &pl_)
Constructor: Dragonfly (e.g. Cori & Trinity) network machine description;.
virtual bool getHopCount(int rank1, int rank2, pcoord_t &hops) const override
getHopCount function set hops between rank1 and rank2 return true if coordinates are available
part_t getNumUniqueGroups() const override
getNumUniqueGroups function return the number of unique Dragonfly network groups in provided allocati...
bool getMachineCoordinate(const int rank, pcoord_t *xyz) const
virtual bool getMachineExtentWrapArounds(bool *wrap_around) const
MachineDragonflyRCA(const Teuchos::Comm< int > &comm)
Constructor: Dragonfly (e.g. Cori & Trinity) network machine description;.
bool getAllMachineCoordinatesView(pcoord_t **&allCoords) const
bool getMachineCoordinate(const char *nodename, pcoord_t *xyz)
bool getGroupCount(part_t *grp_count) const override
getGroupCount function return the number of ranks in each group (RCA X-dim, e.g. first dim)
MachineClass Base class for representing machine coordinates, networks, etc.
Created by mbenlioglu on Aug 31, 2020.
SparseMatrixAdapter_t::part_t part_t