Zoltan2
Loading...
Searching...
No Matches
Zoltan2_GraphMetricsUtility.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Zoltan2: A package of combinatorial algorithms for scientific computing
4//
5// Copyright 2012 NTESS and the Zoltan2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
13#ifndef ZOLTAN2_GRAPHICMETRICVALUESUTILITY_HPP
14#define ZOLTAN2_GRAPHICMETRICVALUESUTILITY_HPP
15
19#include <zoltan_dd.h>
20#include <Zoltan2_TPLTraits.hpp>
21#include <map>
22#include <vector>
23
24namespace Zoltan2 {
25
54template <typename Adapter,
55 typename MachineRep = // Default MachineRep type
56 MachineRepresentation<typename Adapter::scalar_t,
57 typename Adapter::part_t> >
59 const RCP<const Environment> &env,
60 const RCP<const Comm<int> > &comm,
61 const RCP<const GraphModel<typename Adapter::base_adapter_t> > &graph,
62 const ArrayView<const typename Adapter::part_t> &parts,
63 typename Adapter::part_t &numParts,
64 ArrayRCP<RCP<BaseClassMetrics<typename Adapter::scalar_t> > > &metrics,
65 ArrayRCP<typename Adapter::scalar_t> &globalSums,
66 bool bMessages = true,
67 const RCP <const MachineRep> machine = Teuchos::null) {
68
69 env->timerStart(MACRO_TIMERS, "globalWeightedByPart");
70
71 // Note we used to have with hops as a separate method but decided to combine
72 // both into this single method. machine is an optional parameter to choose
73 // between the two methods.
74 bool bHops = (machine != Teuchos::null);
75
76 env->debug(DETAILED_STATUS, "Entering globalWeightedByPart");
77
79 // Initialize return values
80
81 typedef typename Adapter::lno_t lno_t;
82 typedef typename Adapter::gno_t gno_t;
83 typedef typename Adapter::offset_t offset_t;
84 typedef typename Adapter::scalar_t scalar_t;
85 typedef typename Adapter::part_t part_t;
86
88 t_input_t;
89
90 lno_t localNumVertices = graph->getLocalNumVertices();
91 lno_t localNumEdges = graph->getLocalNumEdges();
92
93 ArrayView<const gno_t> Ids;
94 ArrayView<t_input_t> v_wghts;
95 graph->getVertexList(Ids, v_wghts);
96
97 typedef GraphMetrics<scalar_t> gm_t;
98
99 // get the edge ids, and weights
100 ArrayView<const gno_t> edgeIds;
101 ArrayView<const offset_t> offsets;
102 ArrayView<t_input_t> e_wgts;
103 graph->getEdgeList(edgeIds, offsets, e_wgts);
104
105
106 std::vector <scalar_t> edge_weights;
107 int numWeightPerEdge = graph->getNumWeightsPerEdge();
108
109 int numMetrics = bHops ?
110 4 : // "edge cuts", messages, hops, weighted hops
111 2; // "edge cuts", messages
112
113 if (numWeightPerEdge) numMetrics += bHops ?
114 numWeightPerEdge * 2 : // "weight n", weighted hops per weight n
115 numWeightPerEdge; // "weight n"
116
117 // add some more metrics to the array
118 auto next = metrics.size(); // where we begin filling
119 for (auto n = 0; n < numMetrics; ++n) {
120 addNewMetric<gm_t, scalar_t>(env, metrics);
121 }
122
123 std::vector <part_t> e_parts (localNumEdges);
124
125 std::vector<part_t> local_parts;
126
127#ifdef HAVE_ZOLTAN2_MPI
128 if (comm->getSize() > 1) {
129 const bool bUseLocalIDs = false; // Local IDs not needed
131 int debug_level = 0;
132 directory_t directory(comm, bUseLocalIDs, debug_level);
133
134 if (localNumVertices)
135 directory.update(localNumVertices, &Ids[0], NULL, &parts[0],
136 NULL, directory_t::Update_Mode::Replace);
137 else
138 directory.update(localNumVertices, NULL, NULL, NULL,
139 NULL, directory_t::Update_Mode::Replace);
140
141 if (localNumEdges)
142 directory.find(localNumEdges, &edgeIds[0], NULL, &e_parts[0],
143 NULL, NULL, false);
144 else
145 directory.find(localNumEdges, NULL, NULL, NULL,
146 NULL, NULL, false);
147 } else
148#endif
149 {
150 std::map<gno_t,lno_t> global_id_to_local_index;
151
152 // else everything is local.
153 // we need a globalid to local index conversion.
154 // this does not exists till this point, so we need to create one.
155
156 for (lno_t i = 0; i < localNumVertices; ++i){
157 //at the local index i, we have the global index Ids[i].
158 //so write i, to Ids[i] index of the vector.
159 global_id_to_local_index[Ids[i]] = i;
160 }
161
162 for (lno_t i = 0; i < localNumEdges; ++i){
163 gno_t ei = edgeIds[i];
164 //ei is the global index of the neighbor one.
165 part_t p = parts[global_id_to_local_index[ei]];
166 e_parts[i] = p;
167 }
168 }
169
170 RCP<const Teuchos::Comm<int> > tcomm = comm;
171
172 env->timerStart(MACRO_TIMERS, "Communication Graph Create");
173 {
174 const bool bUseLocalIDs = false; // Local IDs not needed
175 int debug_level = 0;
176 // Create a directory indexed by part_t with values t_scalar_t for weight sums
177
178 // this struct is the user data type for a part
179 // each part will have a std::vector<part_info> for its user data
180 // representing the list of all neighbors and a weight for each.
181 struct part_info {
182 part_info() : sum_weights(0) {
183 }
184
185 // operator +=
186 // this allows the directory to know how to assemble two structs
187 // which return true for ==.
188 // TODO: Decide if we want directory to work like this for AggregateAdd
189 const part_info & operator+=(const part_info & src) {
190 sum_weights += src.sum_weights;
191 return *this; // return old value
192 }
193
194 // operator>
195 // TODO: Decide if we want directory to work like this for AggregateAdd
196 bool operator>(const part_info & src) const {
197 // Note: Currently this doesn't actually do anything except allow this
198 // struct to compile. Aggregate mode used operator> to preserve ordering
199 // and therefore a custom struct must currently define it. However in
200 // this test we will be using AggregateAdd mode which doesn't actually
201 // use this operator> . However if we change the test so we require the
202 // input data to already be ordered by target_part, then we could change
203 // the directory implementation so AggregateAdd and Aggregate are almost
204 // identical. The only difference would be that Aggregate mode would
205 // check operator== and if true, throw away the duplicate, while
206 // AggregateAdd mode would check operator== and if true, call operator+=
207 // to combine the values, in this case summing sum_weights.
208 return (target_part > src.target_part);
209 }
210
211 // operator==
212 // this allows the directory to know that two structs should be
213 // aggregated into one using the operator +=.
214 // TODO: Decide if we want directory to work like this for AggregateAdd
215 // This works but seems fussy/complicated - to discuss. I'm not yet sure
216 // how to best integrate this so we can aggregate both simple types where
217 // we just keep unique elements and more complex structs with a 'rule'
218 // for combining them.
219 bool operator==(const part_info & src) const {
220 // if target_part is the same then the values for sum_weights will
221 // be summed.
222 return (target_part == src.target_part);
223 }
224
225 part_t target_part; // the part this part_info refers to
226 scalar_t sum_weights; // the sum of weights
227 };
228
229 // get the vertices in each part in my part.
230 std::vector <lno_t> part_begins(numParts, -1);
231 std::vector <lno_t> part_nexts(localNumVertices, -1);
232
233 // cluster vertices according to their parts.
234 // create local part graph.
235 for (lno_t i = 0; i < localNumVertices; ++i){
236 part_t ap = parts[i];
237 part_nexts[i] = part_begins[ap];
238 part_begins[ap] = i;
239 }
240
241 for (int weight_index = -1;
242 weight_index < numWeightPerEdge; ++weight_index) {
243
244 std::vector<part_t> part_data(numParts); // will resize to lower as needed
245 std::vector<std::vector<part_info>> user_data(numParts); // also to resize
246 int count_total_entries = 0;
247
248 std::vector <part_t> part_neighbors(numParts);
249 std::vector <scalar_t> part_neighbor_weights(numParts, 0);
250 std::vector <scalar_t> part_neighbor_weights_ordered(numParts);
251
252 // coarsen for all vertices in my part in order with parts.
253 for (part_t i = 0; i < numParts; ++i) {
254 part_t num_neighbor_parts = 0;
255 lno_t v = part_begins[i];
256 // get part i, and first vertex in this part v.
257 while (v != -1){
258 // now get the neightbors of v.
259 for (offset_t j = offsets[v]; j < offsets[v+1]; ++j){
260
261 // get the part of the second vertex.
262 part_t ep = e_parts[j];
263
264 // TODO: Can we skip condition (i==ep)
265 // The self reference set is going to be excluded later anyways
266 // so we could make this more efficient.
267 scalar_t ew = 1;
268 if (weight_index > -1){
269 ew = e_wgts[weight_index][j];
270 }
271 // add it to my local part neighbors for part i.
272 if (part_neighbor_weights[ep] < 0.00001){
273 part_neighbors[num_neighbor_parts++] = ep;
274 }
275 part_neighbor_weights[ep] += ew;
276 }
277 v = part_nexts[v];
278 }
279
280 // now get the part list.
281 for (lno_t j = 0; j < num_neighbor_parts; ++j) {
282 part_t neighbor_part = part_neighbors[j];
283 part_neighbor_weights_ordered[j] =
284 part_neighbor_weights[neighbor_part];
285 part_neighbor_weights[neighbor_part] = 0;
286 }
287
288 if (num_neighbor_parts > 0) {
289 // for the new directory note a difference in the logic flow
290 // originally we have CrsMatrix which could collect these values
291 // as we built each row. For the directory it's probably better to
292 // have update called just once so we collect the values and then
293 // do all of the update at the end.
294 part_data[count_total_entries] = i; // set up for directory
295 std::vector<part_info> & add_user_data =
296 user_data[count_total_entries];
297 ++count_total_entries;
298
299 add_user_data.resize(num_neighbor_parts);
300
301 for(int n = 0; n < num_neighbor_parts; ++n) {
302 part_info & add_data = add_user_data[n];
303 add_data.target_part = part_neighbors[n];
304 add_data.sum_weights = part_neighbor_weights_ordered[n];
305 }
306 }
307 }
308
309 scalar_t max_edge_cut = 0;
310 scalar_t total_edge_cut = 0;
311 part_t max_message = 0;
312 part_t total_message = 0;
313
314 part_t total_hop_count = 0;
315 scalar_t total_weighted_hop_count = 0;
316 part_t max_hop_count = 0;
317 scalar_t max_weighted_hop_count = 0;
318
319 // for serial or comm size 1 we need to fill this from local data
320 // TODO: Maybe remove all special casing for serial and make this pipeline
321 // uniform always
322 if(local_parts.size() == 0) {
323 local_parts.resize(numParts);
324 for(size_t n = 0; n < local_parts.size(); ++n) {
325 local_parts[n] = n;
326 }
327 }
328
329 std::vector<std::vector<part_info>> find_user_data;
330
331 // directory does not yet support SerialComm because it still has older
332 // MPI calls which need to be refactored to Teuchos::comm format. To
333 // work around this issue skip the directory calls and just set the
334 // find data equal to the input update data. This works because above
335 // logic has already summed the weights per process so in the SerialComm
336 // case, there won't be duplicates.
337 bool bSerialComm =
338 (dynamic_cast<const Teuchos::SerialComm<int>*>(&(*comm)) != NULL);
339
340 if(!bSerialComm) {
342 directory_t;
343 directory_t directory(comm, bUseLocalIDs, debug_level);
344
345 if(count_total_entries) {
346 // update
347 directory.update(count_total_entries, &part_data[0],
348 NULL, &user_data[0],
349 NULL, directory_t::Update_Mode::AggregateAdd);
350 }
351 else {
352 directory.update(count_total_entries, NULL, NULL, NULL,
353 NULL, directory_t::Update_Mode::AggregateAdd);
354 }
355
356 // get my local_parts (parts managed on this directory)
357 directory.get_locally_managed_gids(local_parts);
358
359 // set up find_user_data to have the right size
360 find_user_data.resize(local_parts.size());
361
362 // find
363 directory.find(local_parts.size(), &local_parts[0], NULL,
364 &find_user_data[0], NULL, NULL, false);
365 }
366 else {
367 find_user_data = user_data;
368 }
369
370 for(size_t n = 0; n < local_parts.size(); ++n) {
371 scalar_t part_edge_cut = 0;
372 part_t part_messages = 0;
373 const std::vector<part_info> & data = find_user_data[n];
374 for(size_t q = 0; q < data.size(); ++q) {
375 const part_t local_part = local_parts[n];
376 const part_info & info = data[q];
377 if (info.target_part != local_part) {
378 part_edge_cut += info.sum_weights;
379 part_messages += 1;
380
381 if(bHops) {
382 typename MachineRep::machine_pcoord_t hop_count = 0;
383 machine->getHopCount(local_part, info.target_part, hop_count);
384
385 part_t hop_counts = hop_count;
386 scalar_t weighted_hop_counts = hop_count * info.sum_weights;
387
388 total_hop_count += hop_counts;
389 total_weighted_hop_count += weighted_hop_counts;
390
391 if (hop_counts > max_hop_count ){
392 max_hop_count = hop_counts;
393 }
394 if (weighted_hop_counts > max_weighted_hop_count ){
395 max_weighted_hop_count = weighted_hop_counts;
396 }
397 }
398 }
399 }
400
401 if(part_edge_cut > max_edge_cut) {
402 max_edge_cut = part_edge_cut;
403 }
404 total_edge_cut += part_edge_cut;
405
406 if (part_messages > max_message){
407 max_message = part_messages;
408 }
409 total_message += part_messages;
410 }
411
412
413 scalar_t g_max_edge_cut = 0;
414 scalar_t g_total_edge_cut = 0;
415 part_t g_max_message = 0;
416 part_t g_total_message = 0;
417
418 part_t g_total_hop_count = 0;
419 scalar_t g_total_weighted_hop_count = 0;
420 part_t g_max_hop_count = 0;
421 scalar_t g_max_weighted_hop_count = 0;
422
423 try{
424 Teuchos::reduceAll<int, scalar_t>(*comm, Teuchos::REDUCE_MAX, 1,
425 &max_edge_cut, &g_max_edge_cut);
426 Teuchos::reduceAll<int, part_t>(*comm, Teuchos::REDUCE_MAX, 1,
427 &max_message, &g_max_message);
428
429 Teuchos::reduceAll<int, scalar_t>(*comm, Teuchos::REDUCE_SUM, 1,
430 &total_edge_cut, &g_total_edge_cut);
431 Teuchos::reduceAll<int, part_t>(*comm, Teuchos::REDUCE_SUM, 1,
432 &total_message, &g_total_message);
433
434 if(bHops) {
435 Teuchos::reduceAll<int, part_t>(*comm, Teuchos::REDUCE_MAX, 1,
436 &max_hop_count, &g_max_hop_count);
437 Teuchos::reduceAll<int, scalar_t>(*comm, Teuchos::REDUCE_MAX, 1,
438 &max_weighted_hop_count,
439 &g_max_weighted_hop_count);
440
441 Teuchos::reduceAll<int, part_t>(*comm, Teuchos::REDUCE_SUM, 1,
442 &total_hop_count, &g_total_hop_count);
443 Teuchos::reduceAll<int, scalar_t>(*comm, Teuchos::REDUCE_SUM, 1,
444 &total_weighted_hop_count,
445 &g_total_weighted_hop_count);
446 }
447 }
449
450 if (weight_index == -1){
451 metrics[next]->setName("edge cuts");
452 }
453 else {
454 std::ostringstream oss;
455 oss << "weight " << weight_index;
456 metrics[next]->setName( oss.str());
457 }
458 metrics[next]->setMetricValue("global maximum", g_max_edge_cut);
459 metrics[next]->setMetricValue("global sum", g_total_edge_cut);
460 next++;
461 if (weight_index == -1){
462 metrics[next]->setName("message");
463 metrics[next]->setMetricValue("global maximum", g_max_message);
464 metrics[next]->setMetricValue("global sum", g_total_message);
465 next++;
466 }
467
468 if(bHops) {
469 if (weight_index == -1){
470 metrics[next]->setName("hops (No Weight)");
471 metrics[next]->setMetricValue("global maximum", g_max_hop_count);
472 metrics[next]->setMetricValue("global sum", g_total_hop_count);
473 next++;
474 }
475
476 std::ostringstream oss;
477 oss << "weighted hops" << weight_index;
478 metrics[next]->setName( oss.str());
479 metrics[next]->
480 setMetricValue("global maximum", g_max_weighted_hop_count);
481 metrics[next]->
482 setMetricValue("global sum", g_total_weighted_hop_count);
483 next++;
484 }
485 }
486 }
487
488 env->timerStop(MACRO_TIMERS, "globalWeightedByPart");
489
490 env->debug(DETAILED_STATUS, "Exiting globalWeightedByPart");
491}
492
495template <typename scalar_t, typename part_t>
496void printGraphMetricsHeader(std::ostream &os,
497 part_t targetNumParts,
498 part_t numParts) {
499
500 os << "Graph Metrics: (" << numParts << " parts)";
501 os << std::endl;
502 if (targetNumParts != numParts) {
503 os << "Target number of parts is: " << targetNumParts << std::endl;
504 }
506}
507
510template <typename scalar_t, typename part_t>
511void printGraphMetrics(std::ostream &os,
512 part_t targetNumParts,
513 part_t numParts,
514 const ArrayView<RCP<BaseClassMetrics<scalar_t> > >
515 &infoList) {
516
517 printGraphMetricsHeader<scalar_t, part_t>(os, targetNumParts, numParts);
518 for (int i=0; i < infoList.size(); i++) {
519 if (infoList[i]->getName() != METRICS_UNSET_STRING) {
520 infoList[i]->printLine(os);
521 }
522 }
523 os << std::endl;
524}
525
528template <typename scalar_t, typename part_t>
529void printGraphMetrics(std::ostream &os,
530 part_t targetNumParts,
531 part_t numParts,
532 RCP<BaseClassMetrics<scalar_t>> metricValue) {
533
534 printGraphMetricsHeader<scalar_t, part_t>(os, targetNumParts, numParts);
535 metricValue->printLine(os);
536}
537
538} //namespace Zoltan2
539
540#endif
#define METRICS_UNSET_STRING
#define Z2_THROW_OUTSIDE_ERROR(env)
Throw an error returned from outside the Zoltan2 library.
Traits class to handle conversions between gno_t/lno_t and TPL data types (e.g., ParMETIS's idx_t,...
static void printHeader(std::ostream &os)
Print a standard header.
GraphModel defines the interface required for graph models.
size_t getLocalNumVertices() const
Returns the number vertices on this process.
map_t::local_ordinal_type lno_t
map_t::global_ordinal_type gno_t
Created by mbenlioglu on Aug 31, 2020.
void printGraphMetrics(std::ostream &os, part_t targetNumParts, part_t numParts, const ArrayView< RCP< BaseClassMetrics< scalar_t > > > &infoList)
Print out list of graph metrics.
@ MACRO_TIMERS
Time an algorithm (or other entity) as a whole.
void printGraphMetricsHeader(std::ostream &os, part_t targetNumParts, part_t numParts)
Print out header info for graph metrics.
@ DETAILED_STATUS
sub-steps, each method's entry and exit
void globalWeightedByPart(const RCP< const Environment > &env, const RCP< const Comm< int > > &comm, const RCP< const GraphModel< typename Adapter::base_adapter_t > > &graph, const ArrayView< const typename Adapter::part_t > &parts, typename Adapter::part_t &numParts, ArrayRCP< RCP< BaseClassMetrics< typename Adapter::scalar_t > > > &metrics, ArrayRCP< typename Adapter::scalar_t > &globalSums, bool bMessages=true, const RCP< const MachineRep > machine=Teuchos::null)
Given the local partitioning, compute the global weighted cuts in each part.
SparseMatrixAdapter_t::part_t part_t