546 const Teuchos::ArrayView<const lno_t>& adjs,
547 const Teuchos::ArrayView<const offset_t>& offsets,
548 const Teuchos::RCP<femv_t>& femv,
549 const Teuchos::ArrayView<const gno_t>& gids,
550 const Teuchos::ArrayView<const int>& rand,
551 const Teuchos::ArrayView<const int>& owners,
552 RCP<const map_t> mapOwnedPlusGhosts,
553 const std::unordered_map<
lno_t, std::vector<int>>& procs_to_send){
554 if(verbose) std::cout<<comm->getRank()<<
": inside coloring algorithm\n";
557 double total_time = 0.0;
558 double interior_time = 0.0;
559 double comm_time = 0.0;
560 double comp_time = 0.0;
561 double recoloring_time = 0.0;
562 double conflict_detection = 0.0;
564 const int numStatisticRecordingRounds = 100;
568 std::vector<int> deg_send_cnts(comm->getSize(),0);
569 std::vector<gno_t> deg_sdispls(comm->getSize()+1,0);
570 for(
int i = 0; i < owners.size(); i++){
571 deg_send_cnts[owners[i]]++;
574 gno_t deg_sendsize = 0;
575 std::vector<int> deg_sentcount(comm->getSize(),0);
576 for(
int i = 1; i < comm->getSize()+1; i++){
577 deg_sdispls[i] = deg_sdispls[i-1] + deg_send_cnts[i-1];
578 deg_sendsize += deg_send_cnts[i-1];
580 std::vector<gno_t> deg_sendbuf(deg_sendsize,0);
581 for(
int i = 0; i < owners.size(); i++){
582 size_t idx = deg_sdispls[owners[i]] + deg_sentcount[owners[i]];
583 deg_sentcount[owners[i]]++;
584 deg_sendbuf[idx] = mapOwnedPlusGhosts->getGlobalElement(i+nVtx);
586 Teuchos::ArrayView<int> deg_send_cnts_view = Teuchos::arrayViewFromVector(deg_send_cnts);
587 Teuchos::ArrayView<gno_t> deg_sendbuf_view = Teuchos::arrayViewFromVector(deg_sendbuf);
588 Teuchos::ArrayRCP<gno_t> deg_recvbuf;
589 std::vector<int> deg_recvcnts(comm->getSize(),0);
590 Teuchos::ArrayView<int> deg_recvcnts_view = Teuchos::arrayViewFromVector(deg_recvcnts);
591 AlltoAllv<gno_t>(*comm, *env, deg_sendbuf_view, deg_send_cnts_view, deg_recvbuf, deg_recvcnts_view);
594 for(
int i = 0; i < deg_recvbuf.size(); i++){
595 lno_t lid = mapOwnedPlusGhosts->getLocalElement(deg_recvbuf[i]);
596 deg_recvbuf[i] = offsets[lid+1] - offsets[lid];
599 ArrayRCP<gno_t> ghost_degrees;
600 AlltoAllv<gno_t>(*comm, *env, deg_recvbuf(), deg_recvcnts_view, ghost_degrees, deg_send_cnts_view);
602 Kokkos::View<gno_t*, device_type> ghost_degrees_dev(
"ghost degree view",ghost_degrees.size());
603 typename Kokkos::View<gno_t*, device_type>::host_mirror_type ghost_degrees_host = Kokkos::create_mirror(ghost_degrees_dev);
604 for(
int i = 0; i < ghost_degrees.size(); i++){
605 lno_t lid = mapOwnedPlusGhosts->getLocalElement(deg_sendbuf[i]);
606 ghost_degrees_host(lid-nVtx) = ghost_degrees[i];
608 Kokkos::deep_copy(ghost_degrees_dev, ghost_degrees_host);
612 for(
size_t i = 0; i < nVtx; i++){
613 offset_t curr_degree = offsets[i+1] - offsets[i];
614 if(curr_degree > local_max_degree){
615 local_max_degree = curr_degree;
618 Teuchos::reduceAll<int, offset_t>(*comm,Teuchos::REDUCE_MAX,1, &local_max_degree, &global_max_degree);
619 if(comm->getRank() == 0 && verbose) std::cout<<
"Input has max degree "<<global_max_degree<<
"\n";
620 if(verbose)std::cout<<comm->getRank()<<
": creating Kokkos Views\n";
622 Kokkos::View<offset_t*, device_type> dist_degrees(
"Owned+Ghost degree view",rand.size());
623 typename Kokkos::View<offset_t*, device_type>::host_mirror_type dist_degrees_host = Kokkos::create_mirror(dist_degrees);
625 for(
int i = 0; i < adjs.size(); i++){
626 if((
size_t)adjs[i] < nVtx)
continue;
627 dist_degrees_host(adjs[i])++;
630 for(
int i = 0; i < offsets.size()-1; i++){
631 dist_degrees_host(i) = offsets[i+1] - offsets[i];
634 Kokkos::View<offset_t*, device_type> dist_offsets(
"Owned+Ghost Offset view", rand.size()+1);
635 typename Kokkos::View<offset_t*, device_type>::host_mirror_type dist_offsets_host = Kokkos::create_mirror(dist_offsets);
638 dist_offsets_host(0) = 0;
639 uint64_t total_adjs = 0;
640 for(Teuchos_Ordinal i = 1; i < rand.size()+1; i++){
641 dist_offsets_host(i) = dist_degrees_host(i-1) + dist_offsets_host(i-1);
642 total_adjs+= dist_degrees_host(i-1);
645 Kokkos::View<lno_t*, device_type> dist_adjs(
"Owned+Ghost adjacency view", total_adjs);
646 typename Kokkos::View<lno_t*, device_type>::host_mirror_type dist_adjs_host = Kokkos::create_mirror(dist_adjs);
648 for(Teuchos_Ordinal i = 0; i < rand.size(); i++){
649 dist_degrees_host(i) = 0;
651 for(
int i = 0; i < adjs.size(); i++) dist_adjs_host(i) = adjs[i];
652 if(comm->getSize() > 1){
653 for(
size_t i = 0; i < nVtx; i++){
654 for(
offset_t j = offsets[i]; j < offsets[i+1]; j++){
656 if( (
size_t)adjs[j] >= nVtx){
658 dist_adjs_host(dist_offsets_host(adjs[j]) + dist_degrees_host(adjs[j])) = i;
659 dist_degrees_host(adjs[j])++;
665 if(verbose) std::cout<<comm->getRank()<<
": copying host mirrors to device views\n";
667 Kokkos::deep_copy(dist_degrees, dist_degrees_host);
668 Kokkos::deep_copy(dist_offsets, dist_offsets_host);
669 Kokkos::deep_copy(dist_adjs, dist_adjs_host);
670 if(verbose) std::cout<<comm->getRank()<<
": done copying to device\n";
673 Kokkos::View<gno_t*, device_type> recoloringSize(
"Recoloring Queue Size",1);
674 typename Kokkos::View<gno_t*, device_type>::host_mirror_type recoloringSize_host = Kokkos::create_mirror(recoloringSize);
675 recoloringSize_host(0) = 0;
676 Kokkos::deep_copy(recoloringSize, recoloringSize_host);
679 Kokkos::View<int*,device_type> rand_dev(
"randVec",rand.size());
680 typename Kokkos::View<int*, device_type>::host_mirror_type rand_host = Kokkos::create_mirror(rand_dev);
681 for(Teuchos_Ordinal i = 0; i < rand.size(); i++){
682 rand_host(i) = rand[i];
686 Kokkos::View<gno_t*, device_type> gid_dev(
"GIDs",gids.size());
687 typename Kokkos::View<gno_t*,device_type>::host_mirror_type gid_host = Kokkos::create_mirror(gid_dev);
688 for(Teuchos_Ordinal i = 0; i < gids.size(); i++){
689 gid_host(i) = gids[i];
693 Kokkos::deep_copy(rand_dev,rand_host);
694 Kokkos::deep_copy(gid_dev, gid_host);
696 if(verbose)std::cout<<comm->getRank()<<
": done creating recoloring datastructures\n";
699 for(
size_t i = 0; i < nVtx; i++){
700 for(
offset_t j = offsets[i]; j < offsets[i+1]; j++){
701 if((
size_t)adjs[j] >= nVtx) {
707 if(verbose)std::cout<<comm->getRank()<<
": creating send views\n";
710 Kokkos::View<lno_t*, device_type> verts_to_send_view(
"verts to send",boundary_size);
711 Kokkos::parallel_for(
"init verts_to_send_view",
712 Kokkos::RangePolicy<execution_space, int>(0,boundary_size),
713 KOKKOS_LAMBDA(
const int& i){
714 verts_to_send_view(i) = -1;
718 Kokkos::View<size_t*, device_type> verts_to_send_size(
"verts to send size",1);
719 Kokkos::View<size_t*, device_type, Kokkos::MemoryTraits<Kokkos::Atomic> > verts_to_send_size_atomic = verts_to_send_size;
720 typename Kokkos::View<lno_t*, device_type>::host_mirror_type verts_to_send_host = create_mirror(verts_to_send_view);
721 typename Kokkos::View<size_t*,device_type>::host_mirror_type verts_to_send_size_host = create_mirror(verts_to_send_size);
723 verts_to_send_size_host(0) = 0;
724 deep_copy(verts_to_send_size, verts_to_send_size_host);
726 if(verbose)std::cout<<comm->getRank()<<
": Done creating send views, initializing...\n";
727 if(verbose)std::cout<<comm->getRank()<<
": boundary_size = "<<boundary_size<<
" verts_to_send_size_atomic(0) = "<<verts_to_send_size_atomic(0)<<
"\n";
729 Kokkos::parallel_for(
"Initialize verts_to_send",
730 Kokkos::RangePolicy<execution_space, int>(0,nVtx),
731 KOKKOS_LAMBDA(
const int&i){
732 for(
offset_t j = dist_offsets(i); j < dist_offsets(i+1); j++){
733 if((
size_t)dist_adjs(j) >= nVtx){
734 verts_to_send_view(verts_to_send_size_atomic(0)++) = i;
743 Kokkos::View<int*, device_type> ghost_colors(
"ghost color backups", rand.size()-nVtx);
744 if(verbose)std::cout<<comm->getRank()<<
": Done initializing\n";
745 gno_t sentPerRound[numStatisticRecordingRounds];
746 gno_t recvPerRound[numStatisticRecordingRounds];
748 if(verbose) std::cout<<comm->getRank()<<
": Coloring interior\n";
751 if(timing) comm->barrier();
752 interior_time =
timer();
753 total_time =
timer();
755 bool use_vbbit = (global_max_degree < 6000);
756 this->colorInterior<execution_space,memory_space>
757 (nVtx, dist_adjs, dist_offsets, femv,dist_adjs,0,use_vbbit);
759 interior_time =
timer() - interior_time;
760 comp_time = interior_time;
762 if(verbose) std::cout<<comm->getRank()<<
": Going to recolor\n";
763 bool recolor_degrees = this->pl->template get<bool>(
"recolor_degrees",
true);
766 if(comm->getSize() > 1){
768 if(verbose)std::cout<<comm->getRank()<<
": going to communicate\n";
771 Kokkos::deep_copy(verts_to_send_host, verts_to_send_view);
772 Kokkos::deep_copy(verts_to_send_size_host, verts_to_send_size);
774 comm_time = doOwnedToGhosts(mapOwnedPlusGhosts,
777 verts_to_send_size_host,
782 sentPerRound[0] = sent;
783 recvPerRound[0] = recv;
784 if(verbose) std::cout<<comm->getRank()<<
": done communicating\n";
785 verts_to_send_size_host(0) = 0;
786 deep_copy(verts_to_send_size, verts_to_send_size_host);
789 Kokkos::View<int**, Kokkos::LayoutLeft, device_type> femvColors =
790 femv->template getLocalView<device_type>(Tpetra::Access::ReadWrite);
791 Kokkos::View<int*, device_type> femv_colors = subview(femvColors, Kokkos::ALL, 0);
792 Kokkos::parallel_for(
"get colors from femv",
793 Kokkos::RangePolicy<execution_space, int>(0,rand.size()-nVtx),
794 KOKKOS_LAMBDA(
const int& i){
795 ghost_colors(i) = femv_colors(i+nVtx);
799 double temp =
timer();
800 detectConflicts<execution_space, memory_space>(nVtx,
806 verts_to_send_size_atomic,
812 deep_copy(recoloringSize_host, recoloringSize);
813 conflict_detection +=
timer() - temp;
814 comp_time += conflict_detection;
817 if(verbose)std::cout<<comm->getRank()<<
": done initial recoloring, begin recoloring loop\n";
818 double totalPerRound[numStatisticRecordingRounds];
819 double commPerRound[numStatisticRecordingRounds];
820 double compPerRound[numStatisticRecordingRounds];
821 double recoloringPerRound[numStatisticRecordingRounds];
822 double conflictDetectionPerRound[numStatisticRecordingRounds];
823 double serialRecoloringPerRound[numStatisticRecordingRounds];
824 int vertsPerRound[numStatisticRecordingRounds];
826 if(comm->getSize() == 1) done =
true;
827 totalPerRound[0] = interior_time + comm_time + conflict_detection;
828 recoloringPerRound[0] = 0;
829 commPerRound[0] = comm_time;
830 compPerRound[0] = interior_time + conflict_detection;
831 conflictDetectionPerRound[0] = conflict_detection;
832 recoloringPerRound[0] = 0;
833 vertsPerRound[0] = 0;
834 int distributedRounds = 1;
835 int serial_threshold = this->pl->template get<int>(
"serial_threshold",0);
837 Kokkos::View<lno_t*, device_type> verts_to_recolor(
"verts_to_recolor", boundary_size);
838 typename Kokkos::View<int*, device_type>::host_mirror_type ghost_colors_host;
840 while(recoloringSize_host(0) > 0 || !done){
841 if(recoloringSize_host(0) < serial_threshold)
break;
843 auto femvColors = femv->getLocalViewDevice(Tpetra::Access::ReadWrite);
844 auto femv_colors = subview(femvColors, Kokkos::ALL, 0);
846 if(distributedRounds < numStatisticRecordingRounds) {
847 vertsPerRound[distributedRounds] = recoloringSize_host(0);
853 Kokkos::deep_copy(verts_to_recolor, verts_to_send_view);
855 double recolor_temp =
timer();
857 deep_copy(verts_to_send_size_host, verts_to_send_size);
858 if(verts_to_send_size_host(0) > 0){
861 dist_adjs,dist_offsets,
864 verts_to_send_size_host(0),
868 if(distributedRounds < numStatisticRecordingRounds){
869 recoloringPerRound[distributedRounds] =
timer() - recolor_temp;
870 recoloring_time += recoloringPerRound[distributedRounds];
871 comp_time += recoloringPerRound[distributedRounds];
872 compPerRound[distributedRounds] = recoloringPerRound[distributedRounds];
873 totalPerRound[distributedRounds] = recoloringPerRound[distributedRounds];
875 double recolor_round_time =
timer() - recolor_temp;
876 recoloring_time += recolor_round_time;
877 comp_time += recolor_round_time;
882 recoloringSize_host(0) = 0;
883 Kokkos::deep_copy(recoloringSize,recoloringSize_host);
885 Kokkos::parallel_for(
"set femv colors",
886 Kokkos::RangePolicy<execution_space, int>(0,rand.size()-nVtx),
887 KOKKOS_LAMBDA(
const int& i){
888 femv_colors(i+nVtx) = ghost_colors(i);
892 Kokkos::deep_copy(verts_to_send_host, verts_to_send_view);
893 Kokkos::deep_copy(verts_to_send_size_host, verts_to_send_size);
896 femvColors =
decltype(femvColors)();
897 femv_colors =
decltype(femv_colors)();
899 double curr_comm_time = doOwnedToGhosts(mapOwnedPlusGhosts,
902 verts_to_send_size_host,
907 comm_time += curr_comm_time;
908 if(distributedRounds < numStatisticRecordingRounds){
909 commPerRound[distributedRounds] = curr_comm_time;
910 sentPerRound[distributedRounds] = sent;
911 recvPerRound[distributedRounds] = recv;
912 totalPerRound[distributedRounds] += commPerRound[distributedRounds];
918 femvColors = femv->getLocalViewDevice(Tpetra::Access::ReadWrite);
919 femv_colors = subview(femvColors, Kokkos::ALL, 0);
920 Kokkos::parallel_for(
"get femv colors 2",
921 Kokkos::RangePolicy<execution_space, int>(0,rand.size()-nVtx),
922 KOKKOS_LAMBDA(
const int& i){
923 ghost_colors(i) = femv_colors(i+nVtx);
926 verts_to_send_size_host(0) = 0;
927 deep_copy(verts_to_send_size, verts_to_send_size_host);
928 double detection_temp =
timer();
929 detectConflicts<execution_space, memory_space>(nVtx,
935 verts_to_send_size_atomic,
942 Kokkos::deep_copy(recoloringSize_host, recoloringSize);
944 if(distributedRounds < numStatisticRecordingRounds){
945 conflictDetectionPerRound[distributedRounds] =
timer() - detection_temp;
946 conflict_detection += conflictDetectionPerRound[distributedRounds];
947 compPerRound[distributedRounds] += conflictDetectionPerRound[distributedRounds];
948 totalPerRound[distributedRounds] += conflictDetectionPerRound[distributedRounds];
949 comp_time += conflictDetectionPerRound[distributedRounds];
951 double conflict_detection_round_time =
timer()- detection_temp;
952 conflict_detection += conflict_detection_round_time;
953 comp_time += conflict_detection_round_time;
957 int localDone = recoloringSize_host(0);
958 Teuchos::reduceAll<int, int>(*comm,Teuchos::REDUCE_SUM,1, &localDone, &globalDone);
965 if(recoloringSize_host(0) > 0 || !done){
966 ghost_colors_host = Kokkos::create_mirror_view(ghost_colors);
967 deep_copy(ghost_colors_host, ghost_colors);
968 deep_copy(verts_to_send_host, verts_to_send_view);
969 deep_copy(verts_to_send_size_host, verts_to_send_size);
974 while(recoloringSize_host(0) > 0 || !done){
976 auto femvColors = femv->getLocalViewHost(Tpetra::Access::ReadWrite);
977 auto femv_colors = subview(femvColors, Kokkos::ALL, 0);
980 if(distributedRounds < 100){
981 vertsPerRound[distributedRounds] = recoloringSize_host(0);
984 double recolor_temp =
timer();
986 if(verts_to_send_size_host(0) > 0){
989 (femv_colors.size(), dist_adjs_host, dist_offsets_host, femv, verts_to_send_host, verts_to_send_size_host(0),
true);
992 if(distributedRounds < numStatisticRecordingRounds){
993 recoloringPerRound[distributedRounds] =
timer() - recolor_temp;
994 recoloring_time += recoloringPerRound[distributedRounds];
995 comp_time += recoloringPerRound[distributedRounds];
996 compPerRound[distributedRounds] = recoloringPerRound[distributedRounds];
997 totalPerRound[distributedRounds] = recoloringPerRound[distributedRounds];
999 double recolor_serial_round_time =
timer() - recolor_temp;
1000 recoloring_time += recolor_serial_round_time;
1001 comp_time += recolor_serial_round_time;
1004 recoloringSize_host(0) = 0;
1006 for(
size_t i = 0; i < rand.size() -nVtx; i++){
1007 femv_colors(i+nVtx) = ghost_colors_host(i);
1011 double curr_comm_time = doOwnedToGhosts(mapOwnedPlusGhosts,
1014 verts_to_send_size_host,
1019 comm_time += curr_comm_time;
1021 if(distributedRounds < numStatisticRecordingRounds){
1022 commPerRound[distributedRounds] = curr_comm_time;
1023 sentPerRound[distributedRounds] = sent;
1024 recvPerRound[distributedRounds] = recv;
1025 totalPerRound[distributedRounds] += commPerRound[distributedRounds];
1027 for(
size_t i = 0; i < rand.size()-nVtx; i++){
1028 ghost_colors_host(i) = femv_colors(i+nVtx);
1031 verts_to_send_size_host(0) = 0;
1032 double detection_temp =
timer();
1033 detectConflicts<host_exec, host_mem>(nVtx,
1039 verts_to_send_size_host,
1040 recoloringSize_host,
1045 if(distributedRounds < numStatisticRecordingRounds){
1046 conflictDetectionPerRound[distributedRounds] =
timer() - detection_temp;
1047 conflict_detection += conflictDetectionPerRound[distributedRounds];
1048 compPerRound[distributedRounds] += conflictDetectionPerRound[distributedRounds];
1049 totalPerRound[distributedRounds] += conflictDetectionPerRound[distributedRounds];
1050 comp_time += conflictDetectionPerRound[distributedRounds];
1052 double conflict_detection_serial_round_time =
timer() - detection_temp;
1053 conflict_detection += conflict_detection_serial_round_time;
1054 comp_time += conflict_detection_serial_round_time;
1058 int localDone = recoloringSize_host(0);
1059 Teuchos::reduceAll<int, int>(*comm, Teuchos::REDUCE_SUM,1, &localDone, &globalDone);
1060 distributedRounds++;
1063 total_time =
timer() - total_time;
1067 std::cout<<comm->getRank()<<
": done recoloring loop, computing statistics\n";
1068 int localBoundaryVertices = 0;
1069 for(
size_t i = 0; i < nVtx; i++){
1070 for(
offset_t j = offsets[i]; j < offsets[i+1]; j++){
1071 if((
size_t)adjs[j] >= nVtx){
1072 localBoundaryVertices++;
1079 int totalBoundarySize = 0;
1080 int totalVertsPerRound[numStatisticRecordingRounds];
1081 double finalTotalPerRound[numStatisticRecordingRounds];
1082 double maxRecoloringPerRound[numStatisticRecordingRounds];
1083 double finalSerialRecoloringPerRound[numStatisticRecordingRounds];
1084 double minRecoloringPerRound[numStatisticRecordingRounds];
1085 double finalCommPerRound[numStatisticRecordingRounds];
1086 double finalCompPerRound[numStatisticRecordingRounds];
1087 double finalConflictDetectionPerRound[numStatisticRecordingRounds];
1088 gno_t finalRecvPerRound[numStatisticRecordingRounds];
1089 gno_t finalSentPerRound[numStatisticRecordingRounds];
1090 for(
int i = 0; i < numStatisticRecordingRounds; i++) {
1091 totalVertsPerRound[i] = 0;
1092 finalTotalPerRound[i] = 0.0;
1093 maxRecoloringPerRound[i] = 0.0;
1094 minRecoloringPerRound[i] = 0.0;
1095 finalCommPerRound[i] = 0.0;
1096 finalCompPerRound[i] = 0.0;
1097 finalConflictDetectionPerRound[i] = 0.0;
1098 finalSentPerRound[i] = 0;
1099 finalRecvPerRound[i] = 0;
1101 Teuchos::reduceAll<int,int>(*comm, Teuchos::REDUCE_SUM,1, &localBoundaryVertices,&totalBoundarySize);
1102 Teuchos::reduceAll<int,int>(*comm, Teuchos::REDUCE_SUM,numStatisticRecordingRounds,vertsPerRound,totalVertsPerRound);
1103 Teuchos::reduceAll<int,double>(*comm, Teuchos::REDUCE_MAX,numStatisticRecordingRounds,totalPerRound,finalTotalPerRound);
1104 Teuchos::reduceAll<int,double>(*comm, Teuchos::REDUCE_MAX,numStatisticRecordingRounds,recoloringPerRound,maxRecoloringPerRound);
1105 Teuchos::reduceAll<int,double>(*comm, Teuchos::REDUCE_MIN,numStatisticRecordingRounds,recoloringPerRound,minRecoloringPerRound);
1106 Teuchos::reduceAll<int,double>(*comm, Teuchos::REDUCE_MAX,numStatisticRecordingRounds,serialRecoloringPerRound,finalSerialRecoloringPerRound);
1107 Teuchos::reduceAll<int,double>(*comm, Teuchos::REDUCE_MAX,numStatisticRecordingRounds,commPerRound,finalCommPerRound);
1108 Teuchos::reduceAll<int,double>(*comm, Teuchos::REDUCE_MAX,numStatisticRecordingRounds,compPerRound,finalCompPerRound);
1109 Teuchos::reduceAll<int,double>(*comm,
1110 Teuchos::REDUCE_MAX,numStatisticRecordingRounds,conflictDetectionPerRound,finalConflictDetectionPerRound);
1111 Teuchos::reduceAll<int,gno_t> (*comm, Teuchos::REDUCE_SUM,numStatisticRecordingRounds,recvPerRound, finalRecvPerRound);
1112 Teuchos::reduceAll<int,gno_t> (*comm, Teuchos::REDUCE_SUM,numStatisticRecordingRounds,sentPerRound, finalSentPerRound);
1114 std::cout <<
"Rank " << comm->getRank()
1115 <<
": boundary size: " << localBoundaryVertices << std::endl;
1116 if(comm->getRank()==0)
1117 std::cout <<
"Total boundary size: " << totalBoundarySize << std::endl;
1118 for(
int i = 0; i < std::min(distributedRounds,numStatisticRecordingRounds); i++){
1119 std::cout <<
"Rank " << comm->getRank()
1120 <<
": recolor " << vertsPerRound[i]
1121 <<
" vertices in round " << i << std::endl;
1122 if(comm->getRank()==0) {
1123 std::cout <<
"recolored " << totalVertsPerRound[i]
1124 <<
" vertices in round " << i << std::endl;
1125 std::cout <<
"total time in round " << i
1126 <<
": " << finalTotalPerRound[i] << std::endl;;
1127 std::cout <<
"recoloring time in round " << i
1128 <<
": " << maxRecoloringPerRound[i] << std::endl;
1129 std::cout <<
"serial recoloring time in round " << i
1130 <<
": " << finalSerialRecoloringPerRound[i] << std::endl;
1131 std::cout <<
"min recoloring time in round " << i
1132 <<
": " << minRecoloringPerRound[i] << std::endl;
1133 std::cout <<
"conflict detection time in round " << i
1134 <<
": " << finalConflictDetectionPerRound[i] << std::endl;
1135 std::cout <<
"comm time in round " << i
1136 <<
": " << finalCommPerRound[i] << std::endl;
1137 std::cout <<
"total sent in round " << i
1138 <<
": " << finalSentPerRound[i] << std::endl;
1139 std::cout <<
"total recv in round " << i
1140 <<
": " << finalRecvPerRound[i] << std::endl;
1141 std::cout <<
"comp time in round " << i
1142 <<
": " << finalCompPerRound[i] << std::endl;
1146 double global_total_time = 0.0;
1147 double global_recoloring_time=0.0;
1148 double global_min_recoloring_time=0.0;
1149 double global_conflict_detection=0.0;
1150 double global_comm_time=0.0;
1151 double global_comp_time=0.0;
1152 double global_interior_time = 0.0;
1153 Teuchos::reduceAll<int,double>(*comm, Teuchos::REDUCE_MAX,1,&total_time,&global_total_time);
1154 Teuchos::reduceAll<int,double>(*comm, Teuchos::REDUCE_MAX,1,&recoloring_time,&global_recoloring_time);
1155 Teuchos::reduceAll<int,double>(*comm, Teuchos::REDUCE_MIN,1,&recoloring_time,&global_min_recoloring_time);
1156 Teuchos::reduceAll<int,double>(*comm, Teuchos::REDUCE_MAX,1,&conflict_detection,&global_conflict_detection);
1157 Teuchos::reduceAll<int,double>(*comm, Teuchos::REDUCE_MAX,1,&comm_time,&global_comm_time);
1158 Teuchos::reduceAll<int,double>(*comm, Teuchos::REDUCE_MAX,1,&comp_time,&global_comp_time);
1159 Teuchos::reduceAll<int,double>(*comm, Teuchos::REDUCE_MAX,1,&interior_time,&global_interior_time);
1162 if(comm->getRank()==0){
1163 std::cout <<
"Total Time: " << global_total_time << std::endl;
1164 std::cout <<
"Interior Time: " << global_interior_time << std::endl;
1165 std::cout <<
"Recoloring Time: " << global_recoloring_time << std::endl;
1166 std::cout <<
"Min Recoloring Time: " << global_min_recoloring_time << std::endl;
1167 std::cout <<
"Conflict Detection Time: " << global_conflict_detection << std::endl;
1168 std::cout <<
"Comm Time: " << global_comm_time << std::endl;
1169 std::cout <<
"Comp Time: " << global_comp_time << std::endl;
1172 if(verbose) std::cout<<comm->getRank()<<
": exiting coloring\n";