Zoltan2
Loading...
Searching...
No Matches
Zoltan2_PartitioningSolution.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Zoltan2: A package of combinatorial algorithms for scientific computing
4//
5// Copyright 2012 NTESS and the Zoltan2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
14#ifndef _ZOLTAN2_PARTITIONINGSOLUTION_HPP_
15#define _ZOLTAN2_PARTITIONINGSOLUTION_HPP_
16
17namespace Zoltan2 {
18template <typename Adapter>
19class PartitioningSolution;
20}
21
23#include <Zoltan2_Solution.hpp>
24#include <Zoltan2_GreedyMWM.hpp>
25#include <Zoltan2_Algorithm.hpp>
27#include <cmath>
28#include <algorithm>
29#include <vector>
30#include <limits>
31#include <sstream>
32
33#ifdef _MSC_VER
34#define NOMINMAX
35#include <windows.h>
36#endif
37
38
39namespace Zoltan2 {
40
55template <typename Adapter>
57{
58public:
59
60#ifndef DOXYGEN_SHOULD_SKIP_THIS
61 typedef typename Adapter::gno_t gno_t;
62 typedef typename Adapter::scalar_t scalar_t;
63 typedef typename Adapter::lno_t lno_t;
64 typedef typename Adapter::part_t part_t;
65 typedef typename Adapter::user_t user_t;
66#endif
67
85 PartitioningSolution( const RCP<const Environment> &env,
86 const RCP<const Comm<int> > &comm,
87 int nUserWeights,
88 const RCP<Algorithm<Adapter> > &algorithm = Teuchos::null);
89
119 PartitioningSolution(const RCP<const Environment> &env,
120 const RCP<const Comm<int> > &comm,
121 int nUserWeights, ArrayView<ArrayRCP<part_t> > reqPartIds,
122 ArrayView<ArrayRCP<scalar_t> > reqPartSizes,
123 const RCP<Algorithm<Adapter> > &algorithm = Teuchos::null);
124
126 // Information that the algorithm may wish to query.
127
130 size_t getTargetGlobalNumberOfParts() const { return nGlobalParts_; }
131
134 size_t getActualGlobalNumberOfParts() const { return nGlobalPartsSolution_; }
135
138 size_t getLocalNumberOfParts() const { return nLocalParts_; }
139
147 scalar_t getLocalFractionOfPart() const { return localFraction_; }
148
158 bool oneToOnePartDistribution() const { return onePartPerProc_; }
159
179 const int *getPartDistribution() const {
180 if (partDist_.size() > 0) return &partDist_[0];
181 else return NULL;
182 }
183
201 if (procDist_.size() > 0) return &procDist_[0];
202 else return NULL;
203 }
204
208 int getNumberOfCriteria() const { return nWeightsPerObj_; }
209
210
217 bool criteriaHasUniformPartSizes(int idx) const { return pSizeUniform_[idx];}
218
231 scalar_t getCriteriaPartSize(int idx, part_t part) const {
232 if (pSizeUniform_[idx])
233 return 1.0 / nGlobalParts_;
234 else if (pCompactIndex_[idx].size())
235 return pSize_[idx][pCompactIndex_[idx][part]];
236 else
237 return pSize_[idx][part];
238 }
239
253 bool criteriaHaveSamePartSizes(int c1, int c2) const;
254
256 // Method used by the algorithm to set results.
257
284 void setParts(ArrayRCP<part_t> &partList);
285
287
299 void RemapParts();
300
302 /* Return the weight of objects staying with a given remap.
303 * If remap is NULL, compute weight of objects staying with given partition
304 */
305 long measure_stays(part_t *remap, int *idx, part_t *adj, long *wgt,
306 part_t nrhs, part_t nlhs)
307 {
308 long staying = 0;
309 for (part_t i = 0; i < nrhs; i++) {
310 part_t k = (remap ? remap[i] : i);
311 for (part_t j = idx[k]; j < idx[k+1]; j++) {
312 if (i == (adj[j]-nlhs)) {
313 staying += wgt[j];
314 break;
315 }
316 }
317 }
318 return staying;
319 }
320
322 // Results that may be queried by the user, by migration methods,
323 // or by metric calculation methods.
324 // We return raw pointers so users don't have to learn about our
325 // pointer wrappers.
326
329 inline const RCP<const Comm<int> > &getCommunicator() const { return comm_;}
330
333 inline const RCP<const Environment> &getEnvironment() const { return env_;}
334
337 const part_t *getPartListView() const {
338 if (parts_.size() > 0) return parts_.getRawPtr();
339 else return NULL;
340 }
341
346 const int *getProcListView() const {
347 if (procs_.size() > 0) return procs_.getRawPtr();
348 else return NULL;
349 }
350
353 virtual bool isPartitioningTreeBinary() const
354 {
355 if (this->algorithm_ == Teuchos::null)
356 throw std::logic_error("no partitioning algorithm has been run yet");
357 return this->algorithm_->isPartitioningTreeBinary();
358 }
359
362 void getPartitionTree(part_t & numTreeVerts,
363 std::vector<part_t> & permPartNums,
364 std::vector<part_t> & splitRangeBeg,
365 std::vector<part_t> & splitRangeEnd,
366 std::vector<part_t> & treeVertParents) const {
367
368 part_t numParts = static_cast<part_t>(getTargetGlobalNumberOfParts());
369
370 if (this->algorithm_ == Teuchos::null)
371 throw std::logic_error("no partitioning algorithm has been run yet");
372 this->algorithm_->getPartitionTree(
373 numParts, // may want to change how this is passed through
374 numTreeVerts,
375 permPartNums,
376 splitRangeBeg,
377 splitRangeEnd,
378 treeVertParents);
379 }
380
383 std::vector<Zoltan2::coordinateModelPartBox> &
385 {
386 return this->algorithm_->getPartBoxesView();
387 }
388
390 // when a point lies on a part boundary, the lowest part
391 // number on that boundary is returned.
392 // Note that not all partitioning algorithms will support
393 // this method.
394 //
395 // \param dim : the number of dimensions specified for the point in space
396 // \param point : the coordinates of the point in space; array of size dim
397 // \return the part number of a part overlapping the given point
398 part_t pointAssign(int dim, scalar_t *point) const
399 {
400 part_t p;
401 try {
402 if (this->algorithm_ == Teuchos::null)
403 throw std::logic_error("no partitioning algorithm has been run yet");
404
405 p = this->algorithm_->pointAssign(dim, point);
406 }
408 return p;
409 }
410
412 // This method allocates memory for the return argument, but does not
413 // control that memory. The user is responsible for freeing the
414 // memory.
415 //
416 // \param dim : (in) the number of dimensions specified for the box
417 // \param lower : (in) the coordinates of the lower corner of the box;
418 // array of size dim
419 // \param upper : (in) the coordinates of the upper corner of the box;
420 // array of size dim
421 // \param nPartsFound : (out) the number of parts overlapping the box
422 // \param partsFound : (out) array of parts overlapping the box
423 void boxAssign(int dim, scalar_t *lower, scalar_t *upper,
424 size_t &nPartsFound, part_t **partsFound) const
425 {
426 try {
427 if (this->algorithm_ == Teuchos::null)
428 throw std::logic_error("no partitioning algorithm has been run yet");
429
430 this->algorithm_->boxAssign(dim, lower, upper, nPartsFound, partsFound);
431 }
433 }
434
435
438 void getCommunicationGraph(ArrayRCP <part_t> &comXAdj,
439 ArrayRCP <part_t> &comAdj) const
440 {
441 try {
442 if (this->algorithm_ == Teuchos::null)
443 throw std::logic_error("no partitioning algorithm has been run yet");
444
445 this->algorithm_->getCommunicationGraph(this, comXAdj, comAdj);
446 }
448 }
449
467 void getPartsForProc(int procId, double &numParts, part_t &partMin,
468 part_t &partMax) const
469 {
470 env_->localInputAssertion(__FILE__, __LINE__, "invalid process id",
471 procId >= 0 && procId < comm_->getSize(), BASIC_ASSERTION);
472
473 procToPartsMap(procId, numParts, partMin, partMax);
474 }
475
487 void getProcsForPart(part_t partId, part_t &procMin, part_t &procMax) const
488 {
489 env_->localInputAssertion(__FILE__, __LINE__, "invalid part id",
490 partId >= 0 && partId < nGlobalParts_, BASIC_ASSERTION);
491
492 partToProcsMap(partId, procMin, procMax);
493 }
494
495private:
496 void partToProc(bool doCheck, bool haveNumLocalParts, bool haveNumGlobalParts,
497 int numLocalParts, int numGlobalParts);
498
499 void procToPartsMap(int procId, double &numParts, part_t &partMin,
500 part_t &partMax) const;
501
502 void partToProcsMap(part_t partId, int &procMin, int &procMax) const;
503
504 void setPartDistribution();
505
506 void setPartSizes(ArrayView<ArrayRCP<part_t> > reqPartIds,
507 ArrayView<ArrayRCP<scalar_t> > reqPartSizes);
508
509 void computePartSizes(int widx, ArrayView<part_t> ids,
510 ArrayView<scalar_t> sizes);
511
512 void broadcastPartSizes(int widx);
513
514
515 RCP<const Environment> env_; // has application communicator
516 const RCP<const Comm<int> > comm_; // the problem communicator
517
518 //part box boundaries as a result of geometric partitioning algorithm.
519 RCP<std::vector<Zoltan2::coordinateModelPartBox> > partBoxes;
520
521 part_t nGlobalParts_;// target global number of parts
522 part_t nLocalParts_; // number of parts to be on this process
523
524 scalar_t localFraction_; // approx fraction of a part on this process
525 int nWeightsPerObj_; // if user has no weights, this is 1 TODO: WHY???
526
527 // If process p is to be assigned part p for all p, then onePartPerProc_
528 // is true. Otherwise it is false, and either procDist_ or partDist_
529 // describes the allocation of parts to processes.
530 //
531 // If parts are never split across processes, then procDist_ is defined
532 // as follows:
533 //
534 // partId = procDist_[procId]
535 // partIdNext = procDist_[procId+1]
536 // globalNumberOfParts = procDist_[numProcs]
537 //
538 // meaning that the parts assigned to process procId range from
539 // [partId, partIdNext). If partIdNext is the same as partId, then
540 // process procId has no parts.
541 //
542 // If the number parts is less than the number of processes, and the
543 // user did not specify "num_local_parts" for each of the processes, then
544 // parts are split across processes, and partDist_ is defined rather than
545 // procDist_.
546 //
547 // procId = partDist_[partId]
548 // procIdNext = partDist_[partId+1]
549 // globalNumberOfProcs = partDist_[numParts]
550 //
551 // which implies that the part partId is shared by processes in the
552 // the range [procId, procIdNext).
553 //
554 // We use std::vector so we can use upper_bound algorithm
555
556 bool onePartPerProc_; // either this is true...
557 std::vector<int> partDist_; // or this is defined ...
558 std::vector<part_t> procDist_; // or this is defined.
559 bool procDistEquallySpread_; // if procDist_ is used and
560 // #parts > #procs and
561 // num_local_parts is not specified,
562 // parts are evenly distributed to procs
563
564 // In order to minimize the storage required for part sizes, we
565 // have three different representations.
566 //
567 // If the part sizes for weight index w are all the same, then:
568 // pSizeUniform_[w] = true
569 // pCompactIndex_[w].size() = 0
570 // pSize_[w].size() = 0
571 //
572 // and the size for part p is 1.0 / nparts.
573 //
574 // If part sizes differ for each part in weight index w, but there
575 // are no more than 64 distinct sizes:
576 // pSizeUniform_[w] = false
577 // pCompactIndex_[w].size() = number of parts
578 // pSize_[w].size() = number of different sizes
579 //
580 // and the size for part p is pSize_[pCompactIndex_[p]].
581 //
582 // If part sizes differ for each part in weight index w, and there
583 // are more than 64 distinct sizes:
584 // pSizeUniform_[w] = false
585 // pCompactIndex_[w].size() = 0
586 // pSize_[w].size() = nparts
587 //
588 // and the size for part p is pSize_[p].
589 //
590 // NOTE: If we expect to have similar cases, i.e. a long list of scalars
591 // where it is highly possible that just a few unique values appear,
592 // then we may want to make this a class. The advantage is that we
593 // save a long list of 1-byte indices instead of a long list of scalars.
594
595 ArrayRCP<bool> pSizeUniform_;
596 ArrayRCP<ArrayRCP<unsigned char> > pCompactIndex_;
597 ArrayRCP<ArrayRCP<scalar_t> > pSize_;
598
600 // The algorithm sets these values upon completion.
601
602 ArrayRCP<part_t> parts_; // part number assigned to localid[i]
603
604 bool haveSolution_;
605
606 part_t nGlobalPartsSolution_; // global number of parts in solution
607
609 // The solution calculates this from the part assignments,
610 // unless onePartPerProc_.
611
612 ArrayRCP<int> procs_; // process rank assigned to localid[i]
613
615 // Algorithm used to compute the solution;
616 // needed for post-processing with pointAssign or getCommunicationGraph
617 const RCP<Algorithm<Adapter> > algorithm_; //
618};
619
621// Definitions
623
624template <typename Adapter>
626 const RCP<const Environment> &env,
627 const RCP<const Comm<int> > &comm,
628 int nUserWeights,
629 const RCP<Algorithm<Adapter> > &algorithm)
630 : env_(env), comm_(comm),
631 partBoxes(),
632 nGlobalParts_(0), nLocalParts_(0),
633 localFraction_(0), nWeightsPerObj_(),
634 onePartPerProc_(false), partDist_(), procDist_(),
635 procDistEquallySpread_(false),
636 pSizeUniform_(), pCompactIndex_(), pSize_(),
637 parts_(), haveSolution_(false), nGlobalPartsSolution_(0),
638 procs_(), algorithm_(algorithm)
639{
640 nWeightsPerObj_ = (nUserWeights ? nUserWeights : 1); // TODO: WHY?? WHY NOT ZERO?
641
642 setPartDistribution();
643
644 // We must call setPartSizes() because part sizes may have
645 // been provided by the user on other processes.
646
647 ArrayRCP<part_t> *noIds = new ArrayRCP<part_t> [nWeightsPerObj_];
648 ArrayRCP<scalar_t> *noSizes = new ArrayRCP<scalar_t> [nWeightsPerObj_];
649 ArrayRCP<ArrayRCP<part_t> > ids(noIds, 0, nWeightsPerObj_, true);
650 ArrayRCP<ArrayRCP<scalar_t> > sizes(noSizes, 0, nWeightsPerObj_, true);
651
652 setPartSizes(ids.view(0, nWeightsPerObj_), sizes.view(0, nWeightsPerObj_));
653
654 env_->memory("After construction of solution");
655}
656
657template <typename Adapter>
659 const RCP<const Environment> &env,
660 const RCP<const Comm<int> > &comm,
661 int nUserWeights,
662 ArrayView<ArrayRCP<part_t> > reqPartIds,
663 ArrayView<ArrayRCP<scalar_t> > reqPartSizes,
664 const RCP<Algorithm<Adapter> > &algorithm)
665 : env_(env), comm_(comm),
666 partBoxes(),
667 nGlobalParts_(0), nLocalParts_(0),
668 localFraction_(0), nWeightsPerObj_(),
669 onePartPerProc_(false), partDist_(), procDist_(),
670 procDistEquallySpread_(false),
671 pSizeUniform_(), pCompactIndex_(), pSize_(),
672 parts_(), haveSolution_(false), nGlobalPartsSolution_(0),
673 procs_(), algorithm_(algorithm)
674{
675 nWeightsPerObj_ = (nUserWeights ? nUserWeights : 1); // TODO: WHY?? WHY NOT ZERO?
676
677 setPartDistribution();
678
679 setPartSizes(reqPartIds, reqPartSizes);
680
681 env_->memory("After construction of solution");
682}
683
684template <typename Adapter>
686{
687 // Did the caller define num_global_parts and/or num_local_parts?
688
689 const ParameterList &pl = env_->getParameters();
690 size_t haveGlobalNumParts=0, haveLocalNumParts=0;
691 int numLocal=0, numGlobal=0;
692
693 const Teuchos::ParameterEntry *pe = pl.getEntryPtr("num_global_parts");
694
695 if (pe){
696 haveGlobalNumParts = 1;
697 nGlobalParts_ = part_t(pe->getValue(&nGlobalParts_));
698 numGlobal = nGlobalParts_;
699 }
700
701 pe = pl.getEntryPtr("num_local_parts");
702
703 if (pe){
704 haveLocalNumParts = 1;
705 nLocalParts_ = part_t(pe->getValue(&nLocalParts_));
706 numLocal = nLocalParts_;
707 }
708
709 try{
710 // Sets onePartPerProc_, partDist_, and procDist_
711
712 partToProc(true, haveLocalNumParts, haveGlobalNumParts,
713 numLocal, numGlobal);
714 }
716
717 int nprocs = comm_->getSize();
718 int rank = comm_->getRank();
719
720 if (onePartPerProc_){
721 nGlobalParts_ = nprocs;
722 nLocalParts_ = 1;
723 }
724 else if (partDist_.size() > 0){ // more procs than parts
725 nGlobalParts_ = partDist_.size() - 1;
726 int pstart = partDist_[0];
727 for (part_t i=1; i <= nGlobalParts_; i++){
728 int pend = partDist_[i];
729 if (rank >= pstart && rank < pend){
730 int numOwners = pend - pstart;
731 nLocalParts_ = 1;
732 localFraction_ = 1.0 / numOwners;
733 break;
734 }
735 pstart = pend;
736 }
737 }
738 else if (procDist_.size() > 0){ // more parts than procs
739 nGlobalParts_ = procDist_[nprocs];
740 nLocalParts_ = procDist_[rank+1] - procDist_[rank];
741 }
742 else {
743 throw std::logic_error("partToProc error");
744 }
745
746}
747
748template <typename Adapter>
749 void PartitioningSolution<Adapter>::setPartSizes(
750 ArrayView<ArrayRCP<part_t> > ids, ArrayView<ArrayRCP<scalar_t> > sizes)
751{
752 int widx = nWeightsPerObj_;
753 bool fail=false;
754
755 size_t *countBuf = new size_t [widx*2];
756 ArrayRCP<size_t> counts(countBuf, 0, widx*2, true);
757
758 fail = ((ids.size() != widx) || (sizes.size() != widx));
759
760 for (int w=0; !fail && w < widx; w++){
761 counts[w] = ids[w].size();
762 if (ids[w].size() != sizes[w].size()) fail=true;
763 }
764
765 env_->globalBugAssertion(__FILE__, __LINE__, "bad argument arrays", fail==0,
766 COMPLEX_ASSERTION, comm_);
767
768 // Are all part sizes the same? This is the common case.
769
770 ArrayRCP<scalar_t> *emptySizes= new ArrayRCP<scalar_t> [widx];
771 pSize_ = arcp(emptySizes, 0, widx);
772
773 ArrayRCP<unsigned char> *emptyIndices= new ArrayRCP<unsigned char> [widx];
774 pCompactIndex_ = arcp(emptyIndices, 0, widx);
775
776 bool *info = new bool [widx];
777 pSizeUniform_ = arcp(info, 0, widx);
778 for (int w=0; w < widx; w++)
779 pSizeUniform_[w] = true;
780
781 if (nGlobalParts_ == 1){
782 return; // there's only one part in the whole problem
783 }
784
785 size_t *ptr1 = counts.getRawPtr();
786 size_t *ptr2 = counts.getRawPtr() + widx;
787
788 try{
789 reduceAll<int, size_t>(*comm_, Teuchos::REDUCE_MAX, widx, ptr1, ptr2);
790 }
792
793 bool zero = true;
794
795 for (int w=0; w < widx; w++)
796 if (counts[widx+w] > 0){
797 zero = false;
798 pSizeUniform_[w] = false;
799 }
800
801 if (zero) // Part sizes for all criteria are uniform.
802 return;
803
804 // Compute the part sizes for criteria for which part sizes were
805 // supplied. Normalize for each criteria so part sizes sum to one.
806
807 int nprocs = comm_->getSize();
808 int rank = comm_->getRank();
809
810 for (int w=0; w < widx; w++){
811 if (pSizeUniform_[w]) continue;
812
813 // Send all ids and sizes to one process.
814 // (There is no simple gather method in Teuchos.)
815
816 part_t length = ids[w].size();
817 part_t *allLength = new part_t [nprocs];
818 Teuchos::gatherAll<int, part_t>(*comm_, 1, &length,
819 nprocs, allLength);
820
821 if (rank == 0){
822 int total = 0;
823 for (int i=0; i < nprocs; i++)
824 total += allLength[i];
825
826 part_t *partNums = new part_t [total];
827 scalar_t *partSizes = new scalar_t [total];
828
829 ArrayView<part_t> idArray(partNums, total);
830 ArrayView<scalar_t> sizeArray(partSizes, total);
831
832 if (length > 0){
833 for (int i=0; i < length; i++){
834 *partNums++ = ids[w][i];
835 *partSizes++ = sizes[w][i];
836 }
837 }
838
839 for (int p=1; p < nprocs; p++){
840 if (allLength[p] > 0){
841 Teuchos::receive<int, part_t>(*comm_, p,
842 allLength[p], partNums);
843 Teuchos::receive<int, scalar_t>(*comm_, p,
844 allLength[p], partSizes);
845 partNums += allLength[p];
846 partSizes += allLength[p];
847 }
848 }
849
850 delete [] allLength;
851
852 try{
853 computePartSizes(w, idArray, sizeArray);
854 }
856
857 delete [] idArray.getRawPtr();
858 delete [] sizeArray.getRawPtr();
859 }
860 else{
861 delete [] allLength;
862 if (length > 0){
863 Teuchos::send<int, part_t>(*comm_, length, ids[w].getRawPtr(), 0);
864 Teuchos::send<int, scalar_t>(*comm_, length, sizes[w].getRawPtr(), 0);
865 }
866 }
867
868 broadcastPartSizes(w);
869 }
870}
871
872template <typename Adapter>
873 void PartitioningSolution<Adapter>::broadcastPartSizes(int widx)
874{
875 env_->localBugAssertion(__FILE__, __LINE__, "preallocations",
876 pSize_.size()>widx &&
877 pSizeUniform_.size()>widx && pCompactIndex_.size()>widx,
879
880 int rank = comm_->getRank();
881 int nprocs = comm_->getSize();
882 part_t nparts = nGlobalParts_;
883
884 if (nprocs < 2)
885 return;
886
887 char flag=0;
888
889 if (rank == 0){
890 if (pSizeUniform_[widx] == true)
891 flag = 1;
892 else if (pCompactIndex_[widx].size() > 0)
893 flag = 2;
894 else
895 flag = 3;
896 }
897
898 try{
899 Teuchos::broadcast<int, char>(*comm_, 0, 1, &flag);
900 }
902
903 if (flag == 1){
904 if (rank > 0)
905 pSizeUniform_[widx] = true;
906
907 return;
908 }
909
910 if (flag == 2){
911
912 // broadcast the indices into the size list
913
914 unsigned char *idxbuf = NULL;
915
916 if (rank > 0){
917 idxbuf = new unsigned char [nparts];
918 env_->localMemoryAssertion(__FILE__, __LINE__, nparts, idxbuf);
919 }
920 else{
921 env_->localBugAssertion(__FILE__, __LINE__, "index list size",
922 pCompactIndex_[widx].size() == nparts, COMPLEX_ASSERTION);
923 idxbuf = pCompactIndex_[widx].getRawPtr();
924 }
925
926 try{
927 // broadcast of unsigned char is not supported
928 Teuchos::broadcast<int, char>(*comm_, 0, nparts,
929 reinterpret_cast<char *>(idxbuf));
930 }
932
933 if (rank > 0)
934 pCompactIndex_[widx] = arcp(idxbuf, 0, nparts, true);
935
936 // broadcast the list of different part sizes
937
938 unsigned char maxIdx=0;
939 for (part_t p=0; p < nparts; p++)
940 if (idxbuf[p] > maxIdx) maxIdx = idxbuf[p];
941
942 int numSizes = maxIdx + 1;
943
944 scalar_t *sizeList = NULL;
945
946 if (rank > 0){
947 sizeList = new scalar_t [numSizes];
948 env_->localMemoryAssertion(__FILE__, __LINE__, numSizes, sizeList);
949 }
950 else{
951 env_->localBugAssertion(__FILE__, __LINE__, "wrong number of sizes",
952 numSizes == pSize_[widx].size(), COMPLEX_ASSERTION);
953
954 sizeList = pSize_[widx].getRawPtr();
955 }
956
957 try{
958 Teuchos::broadcast<int, scalar_t>(*comm_, 0, numSizes, sizeList);
959 }
961
962 if (rank > 0)
963 pSize_[widx] = arcp(sizeList, 0, numSizes, true);
964
965 return;
966 }
967
968 if (flag == 3){
969
970 // broadcast the size of each part
971
972 scalar_t *sizeList = NULL;
973
974 if (rank > 0){
975 sizeList = new scalar_t [nparts];
976 env_->localMemoryAssertion(__FILE__, __LINE__, nparts, sizeList);
977 }
978 else{
979 env_->localBugAssertion(__FILE__, __LINE__, "wrong number of sizes",
980 nparts == pSize_[widx].size(), COMPLEX_ASSERTION);
981
982 sizeList = pSize_[widx].getRawPtr();
983 }
984
985 try{
986 Teuchos::broadcast<int, scalar_t >(*comm_, 0, nparts, sizeList);
987 }
989
990 if (rank > 0)
991 pSize_[widx] = arcp(sizeList, 0, nparts);
992
993 return;
994 }
995}
996
997template <typename Adapter>
998 void PartitioningSolution<Adapter>::computePartSizes(int widx,
999 ArrayView<part_t> ids, ArrayView<scalar_t> sizes)
1000{
1001 int len = ids.size();
1002
1003 if (len == 0){
1004 pSizeUniform_[widx] = true;
1005 return;
1006 }
1007
1008 env_->localBugAssertion(__FILE__, __LINE__, "bad array sizes",
1009 len>0 && sizes.size()==len, COMPLEX_ASSERTION);
1010
1011 env_->localBugAssertion(__FILE__, __LINE__, "bad index",
1012 widx>=0 && widx<nWeightsPerObj_, COMPLEX_ASSERTION);
1013
1014 env_->localBugAssertion(__FILE__, __LINE__, "preallocations",
1015 pSize_.size()>widx &&
1016 pSizeUniform_.size()>widx && pCompactIndex_.size()>widx,
1018
1019 // Check ids and sizes and find min, max and average sizes.
1020 // If sizes are very close to uniform, call them uniform parts.
1021
1022 part_t nparts = nGlobalParts_;
1023 unsigned char *buf = new unsigned char [nparts];
1024 env_->localMemoryAssertion(__FILE__, __LINE__, nparts, buf);
1025 memset(buf, 0, nparts);
1026 ArrayRCP<unsigned char> partIdx(buf, 0, nparts, true);
1027
1028 scalar_t epsilon = 10e-5 / nparts;
1029 scalar_t min=sizes[0], max=sizes[0], sum=0;
1030
1031 for (int i=0; i < len; i++){
1032 part_t id = ids[i];
1033 scalar_t size = sizes[i];
1034
1035 env_->localInputAssertion(__FILE__, __LINE__, "invalid part id",
1036 id>=0 && id<nparts, BASIC_ASSERTION);
1037
1038 env_->localInputAssertion(__FILE__, __LINE__, "invalid part size", size>=0,
1040
1041 // TODO: we could allow users to specify multiple sizes for the same
1042 // part if we add a parameter that says what we are to do with them:
1043 // add them or take the max.
1044
1045 env_->localInputAssertion(__FILE__, __LINE__,
1046 "multiple sizes provided for one part", partIdx[id]==0, BASIC_ASSERTION);
1047
1048 partIdx[id] = 1; // mark that we have a size for this part
1049
1050 if (size < min) min = size;
1051 if (size > max) max = size;
1052 sum += size;
1053 }
1054
1055 if (sum == 0){
1056
1057 // User has given us a list of parts of size 0, we'll set
1058 // the rest to them to equally sized parts.
1059
1060 scalar_t *allSizes = new scalar_t [2];
1061 env_->localMemoryAssertion(__FILE__, __LINE__, 2, allSizes);
1062
1063 ArrayRCP<scalar_t> sizeArray(allSizes, 0, 2, true);
1064
1065 int numNonZero = nparts - len;
1066
1067 allSizes[0] = 0.0;
1068 allSizes[1] = 1.0 / numNonZero;
1069
1070 for (part_t p=0; p < nparts; p++)
1071 buf[p] = 1; // index to default part size
1072
1073 for (int i=0; i < len; i++)
1074 buf[ids[i]] = 0; // index to part size zero
1075
1076 pSize_[widx] = sizeArray;
1077 pCompactIndex_[widx] = partIdx;
1078
1079 return;
1080 }
1081
1082 if (max - min <= epsilon){
1083 pSizeUniform_[widx] = true;
1084 return;
1085 }
1086
1087 // A size for parts that were not specified:
1088 scalar_t avg = sum / nparts;
1089
1090 // We are going to merge part sizes that are very close. This takes
1091 // computation time now, but can save considerably in the storage of
1092 // all part sizes on each process. For example, a common case may
1093 // be some parts are size 1 and all the rest are size 2.
1094
1095 scalar_t *tmp = new scalar_t [len];
1096 env_->localMemoryAssertion(__FILE__, __LINE__, len, tmp);
1097 memcpy(tmp, sizes.getRawPtr(), sizeof(scalar_t) * len);
1098 ArrayRCP<scalar_t> partSizes(tmp, 0, len, true);
1099
1100 std::sort(partSizes.begin(), partSizes.end());
1101
1102 // create a list of sizes that are unique within epsilon
1103
1104 Array<scalar_t> nextUniqueSize;
1105 nextUniqueSize.push_back(partSizes[len-1]); // largest
1106 scalar_t curr = partSizes[len-1];
1107 int avgIndex = len;
1108 bool haveAvg = false;
1109 if (curr - avg <= epsilon)
1110 avgIndex = 0;
1111
1112 for (int i=len-2; i >= 0; i--){
1113 scalar_t val = partSizes[i];
1114 if (curr - val > epsilon){
1115 nextUniqueSize.push_back(val); // the highest in the group
1116 curr = val;
1117 if (avgIndex==len && val > avg && val - avg <= epsilon){
1118 // the average would be in this group
1119 avgIndex = nextUniqueSize.size() - 1;
1120 haveAvg = true;
1121 }
1122 }
1123 }
1124
1125 partSizes.clear();
1126
1127 size_t numSizes = nextUniqueSize.size();
1128 int sizeArrayLen = numSizes;
1129
1130 if (numSizes < 64){
1131 // We can store the size for every part in a compact way.
1132
1133 // Create a list of all sizes in increasing order
1134
1135 if (!haveAvg) sizeArrayLen++; // need to include average
1136
1137 scalar_t *allSizes = new scalar_t [sizeArrayLen];
1138 env_->localMemoryAssertion(__FILE__, __LINE__, sizeArrayLen, allSizes);
1139 ArrayRCP<scalar_t> sizeArray(allSizes, 0, sizeArrayLen, true);
1140
1141 int newAvgIndex = sizeArrayLen;
1142
1143 for (int i=numSizes-1, idx=0; i >= 0; i--){
1144
1145 if (newAvgIndex == sizeArrayLen){
1146
1147 if (haveAvg && i==avgIndex)
1148 newAvgIndex = idx;
1149
1150 else if (!haveAvg && avg < nextUniqueSize[i]){
1151 newAvgIndex = idx;
1152 allSizes[idx++] = avg;
1153 }
1154 }
1155
1156 allSizes[idx++] = nextUniqueSize[i];
1157 }
1158
1159 env_->localBugAssertion(__FILE__, __LINE__, "finding average in list",
1160 newAvgIndex < sizeArrayLen, COMPLEX_ASSERTION);
1161
1162 for (int i=0; i < nparts; i++){
1163 buf[i] = newAvgIndex; // index to default part size
1164 }
1165
1166 sum = (nparts - len) * allSizes[newAvgIndex];
1167
1168 for (int i=0; i < len; i++){
1169 int id = ids[i];
1170 scalar_t size = sizes[i];
1171 int index;
1172
1173 // Find the first size greater than or equal to this size.
1174
1175 if (size < avg && avg - size <= epsilon)
1176 index = newAvgIndex;
1177 else{
1178 typename ArrayRCP<scalar_t>::iterator found =
1179 std::lower_bound(sizeArray.begin(), sizeArray.end(), size);
1180
1181 env_->localBugAssertion(__FILE__, __LINE__, "size array",
1182 found != sizeArray.end(), COMPLEX_ASSERTION);
1183
1184 index = found - sizeArray.begin();
1185 }
1186
1187 buf[id] = index;
1188 sum += allSizes[index];
1189 }
1190
1191 for (int i=0; i < sizeArrayLen; i++){
1192 sizeArray[i] /= sum;
1193 }
1194
1195 pCompactIndex_[widx] = partIdx;
1196 pSize_[widx] = sizeArray;
1197 }
1198 else{
1199 // To have access to part sizes, we must store nparts scalar_ts on
1200 // every process. We expect this is a rare case.
1201
1202 tmp = new scalar_t [nparts];
1203 env_->localMemoryAssertion(__FILE__, __LINE__, nparts, tmp);
1204
1205 sum += ((nparts - len) * avg);
1206
1207 for (int i=0; i < nparts; i++){
1208 tmp[i] = avg/sum;
1209 }
1210
1211 for (int i=0; i < len; i++){
1212 tmp[ids[i]] = sizes[i]/sum;
1213 }
1214
1215 pSize_[widx] = arcp(tmp, 0, nparts);
1216 }
1217}
1218
1219template <typename Adapter>
1220 void PartitioningSolution<Adapter>::setParts(ArrayRCP<part_t> &partList)
1221{
1222 env_->debug(DETAILED_STATUS, "Entering setParts");
1223
1224 size_t len = partList.size();
1225
1226 // Find the actual number of parts in the solution, which can
1227 // be more or less than the nGlobalParts_ target.
1228 // (We may want to compute the imbalance of a given solution with
1229 // respect to a desired solution. This solution may have more or
1230 // fewer parts that the desired solution.)
1231
1232 part_t lMax = -1;
1233 part_t lMin = (len > 0 ? std::numeric_limits<part_t>::max() : 0);
1234 part_t gMax, gMin;
1235
1236 for (size_t i = 0; i < len; i++) {
1237 if (partList[i] < lMin) lMin = partList[i];
1238 if (partList[i] > lMax) lMax = partList[i];
1239 }
1240 Teuchos::reduceAll<int, part_t>(*comm_, Teuchos::REDUCE_MIN, 1, &lMin, &gMin);
1241 Teuchos::reduceAll<int, part_t>(*comm_, Teuchos::REDUCE_MAX, 1, &lMax, &gMax);
1242
1243 nGlobalPartsSolution_ = gMax - gMin + 1;
1244 parts_ = partList;
1245
1246 // Now determine which process gets each object, if not one-to-one.
1247
1248 if (!onePartPerProc_){
1249
1250 int *procs = new int [len];
1251 env_->localMemoryAssertion(__FILE__, __LINE__, len, procs);
1252 procs_ = arcp<int>(procs, 0, len);
1253
1254 if (len > 0) {
1255 part_t *parts = partList.getRawPtr();
1256
1257 if (procDist_.size() > 0){ // parts are not split across procs
1258
1259 int procId;
1260 for (size_t i=0; i < len; i++){
1261 partToProcsMap(parts[i], procs[i], procId);
1262 }
1263 }
1264 else{ // harder - we need to split the parts across multiple procs
1265
1266 lno_t *partCounter = new lno_t [nGlobalPartsSolution_];
1267 env_->localMemoryAssertion(__FILE__, __LINE__, nGlobalPartsSolution_,
1268 partCounter);
1269
1270 int numProcs = comm_->getSize();
1271
1272 //MD NOTE: there was no initialization for partCounter.
1273 //I added the line below, correct me if I am wrong.
1274 memset(partCounter, 0, sizeof(lno_t) * nGlobalPartsSolution_);
1275
1276 for (typename ArrayRCP<part_t>::size_type i=0; i < partList.size(); i++)
1277 partCounter[parts[i]]++;
1278
1279 lno_t *procCounter = new lno_t [numProcs];
1280 env_->localMemoryAssertion(__FILE__, __LINE__, numProcs, procCounter);
1281
1282 int proc1;
1283 int proc2 = partDist_[0];
1284
1285 for (part_t part=1; part < nGlobalParts_; part++){
1286 proc1 = proc2;
1287 proc2 = partDist_[part+1];
1288 int numprocs = proc2 - proc1;
1289
1290 double dNum = partCounter[part];
1291 double dProcs = numprocs;
1292
1293 //std::cout << "dNum:" << dNum << " dProcs:" << dProcs << std::endl;
1294 double each = floor(dNum/dProcs);
1295 double extra = fmod(dNum,dProcs);
1296
1297 for (int proc=proc1, i=0; proc<proc2; proc++, i++){
1298 if (i < extra)
1299 procCounter[proc] = lno_t(each) + 1;
1300 else
1301 procCounter[proc] = lno_t(each);
1302 }
1303 }
1304
1305 delete [] partCounter;
1306
1307 for (typename ArrayRCP<part_t>::size_type i=0; i < partList.size(); i++){
1308 if (partList[i] >= nGlobalParts_){
1309 // Solution has more parts that targeted. These
1310 // objects just remain on this process.
1311 procs[i] = comm_->getRank();
1312 continue;
1313 }
1314 part_t partNum = parts[i];
1315 proc1 = partDist_[partNum];
1316 proc2 = partDist_[partNum + 1];
1317
1318 int proc;
1319 for (proc=proc1; proc < proc2; proc++){
1320 if (procCounter[proc] > 0){
1321 procs[i] = proc;
1322 procCounter[proc]--;
1323 break;
1324 }
1325 }
1326 env_->localBugAssertion(__FILE__, __LINE__, "part to proc",
1327 proc < proc2, COMPLEX_ASSERTION);
1328 }
1329
1330 delete [] procCounter;
1331 }
1332 }
1333 }
1334
1335 // Now that parts_ info is back on home process, remap the parts.
1336 // TODO: The parts will be inconsistent with the proc assignments after
1337 // TODO: remapping. This problem will go away after we separate process
1338 // TODO: mapping from setParts. But for MueLu's use case, the part
1339 // TODO: remapping is all that matters; they do not use the process mapping.
1340 bool doRemap = false;
1341 const Teuchos::ParameterEntry *pe =
1342 env_->getParameters().getEntryPtr("remap_parts");
1343 if (pe) doRemap = pe->getValue(&doRemap);
1344 if (doRemap) RemapParts();
1345
1346 haveSolution_ = true;
1347
1348 env_->memory("After Solution has processed algorithm's answer");
1349 env_->debug(DETAILED_STATUS, "Exiting setParts");
1350}
1351
1352
1353template <typename Adapter>
1355 double &numParts, part_t &partMin, part_t &partMax) const
1356{
1357 if (onePartPerProc_){
1358 numParts = 1.0;
1359 partMin = partMax = procId;
1360 }
1361 else if (procDist_.size() > 0){
1362 partMin = procDist_[procId];
1363 partMax = procDist_[procId+1] - 1;
1364 numParts = procDist_[procId+1] - partMin;
1365 }
1366 else{
1367 // find the first p such that partDist_[p] > procId
1368
1369 std::vector<int>::const_iterator entry;
1370 entry = std::upper_bound(partDist_.begin(), partDist_.end(), procId);
1371
1372 size_t partIdx = entry - partDist_.begin();
1373 int numProcs = partDist_[partIdx] - partDist_[partIdx-1];
1374 partMin = partMax = int(partIdx) - 1;
1375 numParts = 1.0 / numProcs;
1376 }
1377}
1378
1379template <typename Adapter>
1380 void PartitioningSolution<Adapter>::partToProcsMap(part_t partId,
1381 int &procMin, int &procMax) const
1382{
1383 if (partId >= nGlobalParts_){
1384 // setParts() may be given an initial solution which uses a
1385 // different number of parts than the desired solution. It is
1386 // still a solution. We keep it on this process.
1387 procMin = procMax = comm_->getRank();
1388 }
1389 else if (onePartPerProc_){
1390 procMin = procMax = int(partId);
1391 }
1392 else if (procDist_.size() > 0){
1393 if (procDistEquallySpread_) {
1394 // Avoid binary search.
1395 double fProcs = comm_->getSize();
1396 double fParts = nGlobalParts_;
1397 double each = fParts / fProcs;
1398 procMin = int(partId / each);
1399 while (procDist_[procMin] > partId) procMin--;
1400 while (procDist_[procMin+1] <= partId) procMin++;
1401 procMax = procMin;
1402 }
1403 else {
1404 // find the first p such that procDist_[p] > partId.
1405 // For now, do a binary search.
1406
1407 typename std::vector<part_t>::const_iterator entry;
1408 entry = std::upper_bound(procDist_.begin(), procDist_.end(), partId);
1409
1410 size_t procIdx = entry - procDist_.begin();
1411 procMin = procMax = int(procIdx) - 1;
1412 }
1413 }
1414 else{
1415 procMin = partDist_[partId];
1416 procMax = partDist_[partId+1] - 1;
1417 }
1418}
1419
1420template <typename Adapter>
1422 int c1, int c2) const
1423{
1424 if (c1 < 0 || c1 >= nWeightsPerObj_ || c2 < 0 || c2 >= nWeightsPerObj_ )
1425 throw std::logic_error("criteriaHaveSamePartSizes error");
1426
1427 bool theSame = false;
1428
1429 if (c1 == c2)
1430 theSame = true;
1431
1432 else if (pSizeUniform_[c1] == true && pSizeUniform_[c2] == true)
1433 theSame = true;
1434
1435 else if (pCompactIndex_[c1].size() == pCompactIndex_[c2].size()){
1436 theSame = true;
1437 bool useIndex = pCompactIndex_[c1].size() > 0;
1438 if (useIndex){
1439 for (part_t p=0; theSame && p < nGlobalParts_; p++)
1440 if (pSize_[c1][pCompactIndex_[c1][p]] !=
1441 pSize_[c2][pCompactIndex_[c2][p]])
1442 theSame = false;
1443 }
1444 else{
1445 for (part_t p=0; theSame && p < nGlobalParts_; p++)
1446 if (pSize_[c1][p] != pSize_[c2][p])
1447 theSame = false;
1448 }
1449 }
1450
1451 return theSame;
1452}
1453
1469template <typename Adapter>
1471 bool doCheck, bool haveNumLocalParts, bool haveNumGlobalParts,
1472 int numLocalParts, int numGlobalParts)
1473{
1474#ifdef _MSC_VER
1475 typedef SSIZE_T ssize_t;
1476#endif
1477 int nprocs = comm_->getSize();
1478 ssize_t reducevals[4];
1479 ssize_t sumHaveGlobal=0, sumHaveLocal=0;
1480 ssize_t sumGlobal=0, sumLocal=0;
1481 ssize_t maxGlobal=0, maxLocal=0;
1482 ssize_t vals[4] = {haveNumGlobalParts, haveNumLocalParts,
1483 numGlobalParts, numLocalParts};
1484
1485 partDist_.clear();
1486 procDist_.clear();
1487
1488 if (doCheck){
1489
1490 try{
1491 reduceAll<int, ssize_t>(*comm_, Teuchos::REDUCE_SUM, 4, vals, reducevals);
1492 }
1494
1495 sumHaveGlobal = reducevals[0];
1496 sumHaveLocal = reducevals[1];
1497 sumGlobal = reducevals[2];
1498 sumLocal = reducevals[3];
1499
1500 env_->localInputAssertion(__FILE__, __LINE__,
1501 "Either all procs specify num_global/local_parts or none do",
1502 (sumHaveGlobal == 0 || sumHaveGlobal == nprocs) &&
1503 (sumHaveLocal == 0 || sumHaveLocal == nprocs),
1505 }
1506 else{
1507 if (haveNumLocalParts)
1508 sumLocal = numLocalParts * nprocs;
1509 if (haveNumGlobalParts)
1510 sumGlobal = numGlobalParts * nprocs;
1511
1512 sumHaveGlobal = haveNumGlobalParts ? nprocs : 0;
1513 sumHaveLocal = haveNumLocalParts ? nprocs : 0;
1514
1515 maxLocal = numLocalParts;
1516 maxGlobal = numGlobalParts;
1517 }
1518
1519 if (!haveNumLocalParts && !haveNumGlobalParts){
1520 onePartPerProc_ = true; // default if user did not specify
1521 return;
1522 }
1523
1524 if (haveNumGlobalParts){
1525 if (doCheck){
1526 vals[0] = numGlobalParts;
1527 vals[1] = numLocalParts;
1528 try{
1529 reduceAll<int, ssize_t>(
1530 *comm_, Teuchos::REDUCE_MAX, 2, vals, reducevals);
1531 }
1533
1534 maxGlobal = reducevals[0];
1535 maxLocal = reducevals[1];
1536
1537 env_->localInputAssertion(__FILE__, __LINE__,
1538 "Value for num_global_parts is different on different processes.",
1539 maxGlobal * nprocs == sumGlobal, BASIC_ASSERTION);
1540 }
1541
1542 if (sumLocal){
1543 env_->localInputAssertion(__FILE__, __LINE__,
1544 "Sum of num_local_parts does not equal requested num_global_parts",
1545 sumLocal == numGlobalParts, BASIC_ASSERTION);
1546
1547 if (sumLocal == nprocs && maxLocal == 1){
1548 onePartPerProc_ = true; // user specified one part per proc
1549 return;
1550 }
1551 }
1552 else{
1553 if (maxGlobal == nprocs){
1554 onePartPerProc_ = true; // user specified num parts is num procs
1555 return;
1556 }
1557 }
1558 }
1559
1560 // If we are here, we do not have #parts == #procs.
1561
1562 if (sumHaveLocal == nprocs){
1563 //
1564 // We will go by the number of local parts specified.
1565 //
1566
1567 try{
1568 procDist_.resize(nprocs+1);
1569 }
1570 catch (std::exception &){
1571 throw(std::bad_alloc());
1572 }
1573
1574 part_t *procArray = &procDist_[0];
1575
1576 try{
1577 part_t tmp = part_t(numLocalParts);
1578 gatherAll<int, part_t>(*comm_, 1, &tmp, nprocs, procArray + 1);
1579 }
1581
1582 procArray[0] = 0;
1583
1584 for (int proc=0; proc < nprocs; proc++)
1585 procArray[proc+1] += procArray[proc];
1586 }
1587 else{
1588 //
1589 // We will allocate global number of parts to the processes.
1590 //
1591 double fParts = numGlobalParts;
1592 double fProcs = nprocs;
1593
1594 if (fParts < fProcs){
1595
1596 try{
1597 partDist_.resize(size_t(fParts+1));
1598 }
1599 catch (std::exception &){
1600 throw(std::bad_alloc());
1601 }
1602
1603 int *partArray = &partDist_[0];
1604
1605 double each = floor(fProcs / fParts);
1606 double extra = fmod(fProcs, fParts);
1607 partDist_[0] = 0;
1608
1609 for (part_t part=0; part < numGlobalParts; part++){
1610 int numOwners = int(each + ((part<extra) ? 1 : 0));
1611 partArray[part+1] = partArray[part] + numOwners;
1612 }
1613
1614 env_->globalBugAssertion(__FILE__, __LINE__, "#parts != #procs",
1615 partDist_[numGlobalParts] == nprocs, COMPLEX_ASSERTION, comm_);
1616 }
1617 else if (fParts > fProcs){
1618
1619 // User did not specify local number of parts per proc;
1620 // Distribute the parts evenly among the procs.
1621
1622 procDistEquallySpread_ = true;
1623
1624 try{
1625 procDist_.resize(size_t(fProcs+1));
1626 }
1627 catch (std::exception &){
1628 throw(std::bad_alloc());
1629 }
1630
1631 part_t *procArray = &procDist_[0];
1632
1633 double each = floor(fParts / fProcs);
1634 double extra = fmod(fParts, fProcs);
1635 procArray[0] = 0;
1636
1637 for (int proc=0; proc < nprocs; proc++){
1638 part_t numParts = part_t(each + ((proc<extra) ? 1 : 0));
1639 procArray[proc+1] = procArray[proc] + numParts;
1640 }
1641
1642 env_->globalBugAssertion(__FILE__, __LINE__, "#parts != #procs",
1643 procDist_[nprocs] == numGlobalParts, COMPLEX_ASSERTION, comm_);
1644 }
1645 else{
1646 env_->globalBugAssertion(__FILE__, __LINE__,
1647 "should never get here", 1, COMPLEX_ASSERTION, comm_);
1648 }
1649 }
1650}
1651
1653// Remap a new part assignment vector for maximum overlap with an input
1654// part assignment.
1655//
1656// Assumptions for this version:
1657// input part assignment == processor rank for every local object.
1658// assuming nGlobalParts_ <= num ranks
1659// TODO: Write a version that takes the input part number as input;
1660// this change requires input parts in adapters to be provided in
1661// the Adapter.
1662// TODO: For repartitioning, compare to old remapping results; see Zoltan1.
1663
1664template <typename Adapter>
1666{
1667 size_t len = parts_.size();
1668
1669 part_t me = comm_->getRank();
1670 int np = comm_->getSize();
1671
1672 if (np < nGlobalParts_) {
1673 if (me == 0) {
1674 std::ostringstream msg;
1675 msg << "Remapping not yet supported for "
1676 << "num_global_parts " << nGlobalParts_
1677 << " > num procs " << np << std::endl;
1678 env_->debug(DETAILED_STATUS, msg.str());
1679 }
1680 return;
1681 }
1682 // Build edges of a bipartite graph with np + nGlobalParts_ vertices,
1683 // and edges between a process vtx and any parts to which that process'
1684 // objects are assigned.
1685 // Weight edge[parts_[i]] by the number of objects that are going from
1686 // this rank to parts_[i].
1687 // We use a std::map, assuming the number of unique parts in the parts_ array
1688 // is small to keep the binary search efficient.
1689 // TODO We use the count of objects to move; should change to SIZE of objects
1690 // to move; need SIZE function in Adapter.
1691
1692 std::map<part_t, long> edges;
1693 long lstaying = 0; // Total num of local objects staying if we keep the
1694 // current mapping. TODO: change to SIZE of local objs
1695 long gstaying = 0; // Total num of objects staying in the current partition
1696
1697 for (size_t i = 0; i < len; i++) {
1698 edges[parts_[i]]++; // TODO Use obj size instead of count
1699 if (parts_[i] == me) lstaying++; // TODO Use obj size instead of count
1700 }
1701
1702 Teuchos::reduceAll<int, long>(*comm_, Teuchos::REDUCE_SUM, 1,
1703 &lstaying, &gstaying);
1704//TODO if (gstaying == Adapter::getGlobalNumObjs()) return; // Nothing to do
1705
1706 part_t *remap = NULL;
1707
1708 int nedges = edges.size();
1709
1710 // Gather the graph to rank 0.
1711 part_t tnVtx = np + nGlobalParts_; // total # vertices
1712 int *idx = NULL; // Pointer index into graph adjacencies
1713 int *sizes = NULL; // nedges per rank
1714 if (me == 0) {
1715 idx = new int[tnVtx+1];
1716 sizes = new int[np];
1717 sizes[0] = nedges;
1718 }
1719 if (np > 1)
1720 Teuchos::gather<int, int>(&nedges, 1, sizes, 1, 0, *comm_);
1721
1722 // prefix sum to build the idx array
1723 if (me == 0) {
1724 idx[0] = 0;
1725 for (int i = 0; i < np; i++)
1726 idx[i+1] = idx[i] + sizes[i];
1727 }
1728
1729 // prepare to send edges
1730 int cnt = 0;
1731 part_t *bufv = NULL;
1732 long *bufw = NULL;
1733 if (nedges) {
1734 bufv = new part_t[nedges];
1735 bufw = new long[nedges];
1736 // Create buffer with edges (me, part[i]) and weight edges[parts_[i]].
1737 for (typename std::map<part_t, long>::iterator it = edges.begin();
1738 it != edges.end(); it++) {
1739 bufv[cnt] = it->first; // target part
1740 bufw[cnt] = it->second; // weight
1741 cnt++;
1742 }
1743 }
1744
1745 // Prepare to receive edges on rank 0
1746 part_t *adj = NULL;
1747 long *wgt = NULL;
1748 if (me == 0) {
1749//SYM adj = new part_t[2*idx[np]]; // need 2x space to symmetrize later
1750//SYM wgt = new long[2*idx[np]]; // need 2x space to symmetrize later
1751 adj = new part_t[idx[np]];
1752 wgt = new long[idx[np]];
1753 }
1754
1755 Teuchos::gatherv<int, part_t>(bufv, cnt, adj, sizes, idx, 0, *comm_);
1756 Teuchos::gatherv<int, long>(bufw, cnt, wgt, sizes, idx, 0, *comm_);
1757 delete [] bufv;
1758 delete [] bufw;
1759
1760 // Now have constructed graph on rank 0.
1761 // Call the matching algorithm
1762
1763 int doRemap;
1764 if (me == 0) {
1765 // We have the "LHS" vertices of the bipartite graph; need to create
1766 // "RHS" vertices.
1767 for (int i = 0; i < idx[np]; i++) {
1768 adj[i] += np; // New RHS vertex number; offset by num LHS vertices
1769 }
1770
1771 // Build idx for RHS vertices
1772 for (part_t i = np; i < tnVtx; i++) {
1773 idx[i+1] = idx[i]; // No edges for RHS vertices
1774 }
1775
1776#ifdef KDDKDD_DEBUG
1777 std::cout << "IDX ";
1778 for (part_t i = 0; i <= tnVtx; i++) std::cout << idx[i] << " ";
1779 std::cout << std::endl;
1780
1781 std::cout << "ADJ ";
1782 for (part_t i = 0; i < idx[tnVtx]; i++) std::cout << adj[i] << " ";
1783 std::cout << std::endl;
1784
1785 std::cout << "WGT ";
1786 for (part_t i = 0; i < idx[tnVtx]; i++) std::cout << wgt[i] << " ";
1787 std::cout << std::endl;
1788#endif
1789
1790 // Perform matching on the graph
1791 part_t *match = new part_t[tnVtx];
1792 for (part_t i = 0; i < tnVtx; i++) match[i] = i;
1793 part_t nmatches =
1794 Zoltan2::GreedyMWM<part_t, long>(idx, adj, wgt, tnVtx, match);
1795
1796#ifdef KDDKDD_DEBUG
1797 std::cout << "After matching: " << nmatches << " found" << std::endl;
1798 for (part_t i = 0; i < tnVtx; i++)
1799 std::cout << "match[" << i << "] = " << match[i]
1800 << ((match[i] != i &&
1801 (i < np && match[i] != i+np))
1802 ? " *" : " ")
1803 << std::endl;
1804#endif
1805
1806 // See whether there were nontrivial changes in the matching.
1807 bool nontrivial = false;
1808 if (nmatches) {
1809 for (part_t i = 0; i < np; i++) {
1810 if ((match[i] != i) && (match[i] != (i+np))) {
1811 nontrivial = true;
1812 break;
1813 }
1814 }
1815 }
1816
1817 // Process the matches
1818 if (nontrivial) {
1819 remap = new part_t[nGlobalParts_];
1820 for (part_t i = 0; i < nGlobalParts_; i++) remap[i] = -1;
1821
1822 bool *used = new bool[np];
1823 for (part_t i = 0; i < np; i++) used[i] = false;
1824
1825 // First, process all matched parts
1826 for (part_t i = 0; i < nGlobalParts_; i++) {
1827 part_t tmp = i + np;
1828 if (match[tmp] != tmp) {
1829 remap[i] = match[tmp];
1830 used[match[tmp]] = true;
1831 }
1832 }
1833
1834 // Second, process unmatched parts; keep same part number if possible
1835 for (part_t i = 0; i < nGlobalParts_; i++) {
1836 if (remap[i] > -1) continue;
1837 if (!used[i]) {
1838 remap[i] = i;
1839 used[i] = true;
1840 }
1841 }
1842
1843 // Third, process unmatched parts; give them the next unused part
1844 for (part_t i = 0, uidx = 0; i < nGlobalParts_; i++) {
1845 if (remap[i] > -1) continue;
1846 while (used[uidx]) uidx++;
1847 remap[i] = uidx;
1848 used[uidx] = true;
1849 }
1850 delete [] used;
1851 }
1852 delete [] match;
1853
1854#ifdef KDDKDD_DEBUG
1855 std::cout << "Remap vector: ";
1856 for (part_t i = 0; i < nGlobalParts_; i++) std::cout << remap[i] << " ";
1857 std::cout << std::endl;
1858#endif
1859
1860 long newgstaying = measure_stays(remap, idx, adj, wgt,
1861 nGlobalParts_, np);
1862 doRemap = (newgstaying > gstaying);
1863 std::ostringstream msg;
1864 msg << "gstaying " << gstaying << " measure(input) "
1865 << measure_stays(NULL, idx, adj, wgt, nGlobalParts_, np)
1866 << " newgstaying " << newgstaying
1867 << " nontrivial " << nontrivial
1868 << " doRemap " << doRemap << std::endl;
1869 env_->debug(DETAILED_STATUS, msg.str());
1870 }
1871 delete [] idx;
1872 delete [] sizes;
1873 delete [] adj;
1874 delete [] wgt;
1875
1876 Teuchos::broadcast<int, int>(*comm_, 0, 1, &doRemap);
1877
1878 if (doRemap) {
1879 if (me != 0) remap = new part_t[nGlobalParts_];
1880 Teuchos::broadcast<int, part_t>(*comm_, 0, nGlobalParts_, remap);
1881 for (size_t i = 0; i < len; i++) {
1882 parts_[i] = remap[parts_[i]];
1883 }
1884 }
1885
1886 delete [] remap; // TODO May want to keep for repartitioning as in Zoltan
1887}
1888
1889
1890} // namespace Zoltan2
1891
1892#endif
Zoltan2::BasicUserTypes< zscalar_t, zlno_t, zgno_t > user_t
Definition Metric.cpp:39
Defines the Environment class.
#define Z2_FORWARD_EXCEPTIONS
Forward an exception back through call stack.
#define Z2_THROW_OUTSIDE_ERROR(env)
Throw an error returned from outside the Zoltan2 library.
Greedy Maximal Weight Matching.
Defines the Solution base class.
Algorithm defines the base class for all algorithms.
A PartitioningSolution is a solution to a partitioning problem.
virtual bool isPartitioningTreeBinary() const
calculate if partition tree is binary.
const int * getProcListView() const
Returns the process list corresponding to the global ID list.
int getNumberOfCriteria() const
Get the number of criteria (object weights)
scalar_t getLocalFractionOfPart() const
If parts are divided across processes, return the fraction of a part on this process.
void RemapParts()
Remap a new partition for maximum overlap with an input partition.
size_t getActualGlobalNumberOfParts() const
Returns the actual global number of parts provided in setParts().
size_t getLocalNumberOfParts() const
Returns the number of parts to be assigned to this process.
PartitioningSolution(const RCP< const Environment > &env, const RCP< const Comm< int > > &comm, int nUserWeights, const RCP< Algorithm< Adapter > > &algorithm=Teuchos::null)
Constructor when part sizes are not supplied.
void getPartitionTree(part_t &numTreeVerts, std::vector< part_t > &permPartNums, std::vector< part_t > &splitRangeBeg, std::vector< part_t > &splitRangeEnd, std::vector< part_t > &treeVertParents) const
get the partition tree - fill the relevant arrays
scalar_t getCriteriaPartSize(int idx, part_t part) const
Get the size for a given weight index and a given part.
void boxAssign(int dim, scalar_t *lower, scalar_t *upper, size_t &nPartsFound, part_t **partsFound) const
Return an array of all parts overlapping a given box in space.
const RCP< const Environment > & getEnvironment() const
Return the environment associated with the solution.
std::vector< Zoltan2::coordinateModelPartBox > & getPartBoxesView() const
returns the part box boundary list.
void setParts(ArrayRCP< part_t > &partList)
The algorithm uses setParts to set the solution.
const int * getPartDistribution() const
Return a distribution by part.
long measure_stays(part_t *remap, int *idx, part_t *adj, long *wgt, part_t nrhs, part_t nlhs)
bool oneToOnePartDistribution() const
Is the part-to-process distribution is one-to-one.
bool criteriaHaveSamePartSizes(int c1, int c2) const
Return true if the two weight indices have the same part size information.
const part_t * getPartListView() const
Returns the part list corresponding to the global ID list.
part_t pointAssign(int dim, scalar_t *point) const
Return the part overlapping a given point in space;.
const RCP< const Comm< int > > & getCommunicator() const
Return the communicator associated with the solution.
void getProcsForPart(part_t partId, part_t &procMin, part_t &procMax) const
Get the processes containing a part.
void getPartsForProc(int procId, double &numParts, part_t &partMin, part_t &partMax) const
Get the parts belonging to a process.
size_t getTargetGlobalNumberOfParts() const
Returns the global number of parts desired in the solution.
void getCommunicationGraph(ArrayRCP< part_t > &comXAdj, ArrayRCP< part_t > &comAdj) const
returns communication graph resulting from geometric partitioning.
const part_t * getProcDistribution() const
Return a distribution by process.
bool criteriaHasUniformPartSizes(int idx) const
Determine if balancing criteria has uniform part sizes. (User can specify differing part sizes....
Just a placeholder for now.
map_t::local_ordinal_type lno_t
map_t::global_ordinal_type gno_t
Created by mbenlioglu on Aug 31, 2020.
static const std::string fail
@ DETAILED_STATUS
sub-steps, each method's entry and exit
@ BASIC_ASSERTION
fast typical checks for valid arguments
@ COMPLEX_ASSERTION
more involved, like validate a graph
#define epsilon
Definition nd.cpp:47
SparseMatrixAdapter_t::part_t part_t