87 const Teuchos::ParameterList& pL = GetParameterList();
90 const int startLevel = pL.get<
int>(
"repartition: start level");
91 const int nodeRepartLevel = pL.get<
int>(
"repartition: node repartition level");
92 LO minRowsPerProcess = pL.get<LO>(
"repartition: min rows per proc");
93 LO targetRowsPerProcess = pL.get<LO>(
"repartition: target rows per proc");
94 LO minRowsPerThread = pL.get<LO>(
"repartition: min rows per thread");
95 LO targetRowsPerThread = pL.get<LO>(
"repartition: target rows per thread");
96 LO putOnSingleProc = pL.get<LO>(
"repartition: put on single proc");
97 const double nonzeroImbalance = pL.get<
double>(
"repartition: max imbalance");
98 const bool useMap = pL.get<
bool>(
"repartition: use map");
100 int thread_per_mpi_rank = 1;
101#if defined(KOKKOS_ENABLE_OPENMP)
102 using execution_space =
typename Node::device_type::execution_space;
103 if (std::is_same<execution_space, Kokkos::OpenMP>::value)
104 thread_per_mpi_rank = execution_space().concurrency();
107 if (minRowsPerThread > 0)
109 minRowsPerProcess = minRowsPerThread * thread_per_mpi_rank;
111 if (targetRowsPerThread == 0)
112 targetRowsPerThread = minRowsPerThread;
114 if (targetRowsPerThread > 0)
116 targetRowsPerProcess = targetRowsPerThread * thread_per_mpi_rank;
118 if (targetRowsPerProcess == 0)
119 targetRowsPerProcess = minRowsPerProcess;
122 Set<LO>(currentLevel,
"repartition: heuristic target rows per process", targetRowsPerProcess);
125 TEUCHOS_TEST_FOR_EXCEPTION(nodeRepartLevel >= startLevel,
Exceptions::RuntimeError,
"MueLu::RepartitionHeuristicFactory::Build(): If 'repartition: node repartition level' is set, it must be less than or equal to 'repartition: start level'");
128 RCP<const FactoryBase> Afact;
131 Afact = GetFactory(
"A");
132 if (!Afact.is_null() && Teuchos::rcp_dynamic_cast<const RAPFactory>(Afact) == Teuchos::null &&
133 Teuchos::rcp_dynamic_cast<const BlockedRAPFactory>(Afact) == Teuchos::null &&
134 Teuchos::rcp_dynamic_cast<const SubBlockAFactory>(Afact) == Teuchos::null) {
135 GetOStream(
Warnings) <<
"MueLu::RepartitionHeuristicFactory::Build: The generation factory for A must "
136 "be a RAPFactory or a SubBlockAFactory providing the non-rebalanced matrix information! "
137 "It specifically must not be of type Rebalance(Blocked)AcFactory or similar. "
138 "Please check the input. Make also sure that \"number of partitions\" is provided to "
139 "the Interface class and the RepartitionFactory instance. Instead, we have a "
140 << Afact->description() << std::endl;
143 A = Get<RCP<Matrix> >(currentLevel,
"A");
144 map = A->getRowMap();
146 map = Get<RCP<const Map> >(currentLevel,
"Map");
158 if (currentLevel.
GetLevelID() == nodeRepartLevel && map->getComm()->getSize() > 1) {
159 RCP<const Teuchos::Comm<int> > NodeComm = Get<RCP<const Teuchos::Comm<int> > >(currentLevel,
"Node Comm");
160 TEUCHOS_TEST_FOR_EXCEPTION(NodeComm.is_null(),
Exceptions::RuntimeError,
"MueLu::RepartitionHeuristicFactory::Build(): NodeComm is null.");
163 if (NodeComm()->getSize() != map->getComm()->getSize()) {
164 GetOStream(
Statistics1) <<
"Repartitioning? YES: \n Within node only" << std::endl;
165 int nodeRank = NodeComm->getRank();
168 int isZero = (nodeRank == 0);
170 Teuchos::reduceAll(*map->getComm(), Teuchos::REDUCE_SUM, isZero, Teuchos::outArg(numNodes));
171 Set(currentLevel,
"number of partitions", numNodes);
179 <<
"\n current level = " << Teuchos::toString(currentLevel.
GetLevelID()) <<
", first level where repartitioning can happen is " + Teuchos::toString(startLevel) << std::endl;
182 Set(currentLevel,
"number of partitions", -1);
187 RCP<const Teuchos::Comm<int> > origComm = map->getComm();
188 RCP<const Teuchos::Comm<int> > comm = origComm;
196 if (comm->getSize() == 1 && Teuchos::rcp_dynamic_cast<const RAPFactory>(Afact) != Teuchos::null) {
198 <<
"\n comm size = 1" << std::endl;
200 Set(currentLevel,
"number of partitions", -1);
204 int numActiveProcesses = 0;
205 MueLu_sumAll(comm, Teuchos::as<int>((map->getLocalNumElements() > 0) ? 1 : 0), numActiveProcesses);
207 if (numActiveProcesses == 1) {
209 <<
"\n # processes with rows = " << Teuchos::toString(numActiveProcesses) << std::endl;
211 Set(currentLevel,
"number of partitions", 1);
218 if (putOnSingleProc && map->getGlobalNumElements() < (Xpetra::global_size_t)putOnSingleProc) {
220 <<
"\n # rows is below the single-proc threshold = " << putOnSingleProc << std::endl;
222 Set(currentLevel,
"number of partitions", 1);
226 bool test3 =
false, test4 =
false;
227 std::string msg3, msg4;
231 if (minRowsPerProcess > 0) {
232 LO numMyRows = Teuchos::as<LO>(map->getLocalNumElements()), minNumRows, LOMAX = Teuchos::OrdinalTraits<LO>::max();
233 LO haveFewRows = (numMyRows < minRowsPerProcess ? 1 : 0), numWithFewRows = 0;
235 MueLu_minAll(comm, (numMyRows > 0 ? numMyRows : LOMAX), minNumRows);
240 if (numWithFewRows > 0)
243 msg3 =
"\n min # rows per proc = " + Teuchos::toString(minNumRows) +
", min allowable = " + Teuchos::toString(minRowsPerProcess);
251 GO minNnz, maxNnz, numMyNnz = Teuchos::as<GO>(A->getLocalNumEntries());
253 MueLu_minAll(comm, (numMyNnz > 0 ? numMyNnz : maxNnz), minNnz);
254 double imbalance = Teuchos::as<double>(maxNnz) / minNnz;
256 if (imbalance > nonzeroImbalance)
259 msg4 =
"\n nonzero imbalance = " + Teuchos::toString(imbalance) +
", max allowable = " + Teuchos::toString(nonzeroImbalance);
263 if (!test3 && !test4) {
264 GetOStream(
Statistics1) <<
"Repartitioning? NO:" << msg3 + msg4 << std::endl;
267 Set(currentLevel,
"number of partitions", -1);
271 GetOStream(
Statistics1) <<
"Repartitioning? YES:" << msg3 + msg4 << std::endl;
284 const auto globalNumRows = Teuchos::as<GO>(map->getGlobalNumElements());
285 int numPartitions = 1;
286 if (globalNumRows >= targetRowsPerProcess) {
288 numPartitions = std::max(Teuchos::as<int>(globalNumRows / targetRowsPerProcess), 1);
290 numPartitions = std::min(numPartitions, comm->getSize());
292 Set(currentLevel,
"number of partitions", numPartitions);
294 GetOStream(
Statistics1) <<
"Number of partitions to use = " << numPartitions << std::endl;