MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_PerfUtils_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// MueLu: A package for multigrid based preconditioning
4//
5// Copyright 2012 NTESS and the MueLu contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef MUELU_PERFUTILS_DEF_HPP
11#define MUELU_PERFUTILS_DEF_HPP
12
13#include <algorithm>
14#include <string>
15
16#ifdef HAVE_MPI
17#include <Teuchos_CommHelpers.hpp>
18#endif
19
20#include <Xpetra_Export.hpp>
21#include <Xpetra_Import.hpp>
22#include <Xpetra_Matrix.hpp>
23#include <Xpetra_CrsMatrixWrap.hpp>
24
26
27//#include "MueLu_Utilities.hpp"
28
29namespace MueLu {
30
31template <class Type>
32void calculateStats(Type& minVal, Type& maxVal, double& avgVal, double& devVal, int& minProc, int& maxProc, const RCP<const Teuchos::Comm<int> >& comm, int numActiveProcs, const Type& v) {
33 Type sumVal, sum2Val, v2 = v * v;
34 using MT = typename Teuchos::ScalarTraits<Type>::magnitudeType;
35 double zero = Teuchos::ScalarTraits<double>::zero();
36
37 MueLu_sumAll(comm, v, sumVal);
38 MueLu_sumAll(comm, v2, sum2Val);
39 MueLu_minAll(comm, v, minVal);
40 MueLu_maxAll(comm, v, maxVal);
41
42 int w;
43 w = (minVal == v) ? comm->getRank() : -1;
44 MueLu_maxAll(comm, w, maxProc);
45 w = (maxVal == v) ? comm->getRank() : -1;
46 MueLu_maxAll(comm, w, minProc);
47
48 avgVal = (numActiveProcs > 0 ? (as<double>(Teuchos::ScalarTraits<Type>::real(sumVal)) / numActiveProcs) : zero);
49 MT avgVal_MT = Teuchos::as<MT>(avgVal);
50 devVal = (numActiveProcs > 1 ? sqrt((as<double>(Teuchos::ScalarTraits<Type>::real(sum2Val - sumVal * avgVal_MT))) / (numActiveProcs - 1)) : zero);
51}
52
53template <class Type>
54std::string stringStats(const RCP<const Teuchos::Comm<int> >& comm, int numActiveProcs, const Type& v, RCP<ParameterList> paramList = Teuchos::null) {
55 Type minVal, maxVal;
56 double avgVal, devVal;
57 int minProc, maxProc;
58 calculateStats<Type>(minVal, maxVal, avgVal, devVal, minProc, maxProc, comm, numActiveProcs, v);
59
60 const double zero = Teuchos::ScalarTraits<double>::zero();
61 const double one = Teuchos::ScalarTraits<double>::one();
62 std::ostringstream buf;
63 buf << std::fixed;
64 if ((avgVal != zero) && (paramList.is_null() || !paramList->isParameter("print abs") || paramList->get<bool>("print abs") == false)) {
65 double relDev = (devVal / avgVal) * 100;
66 double relMin = (as<double>(Teuchos::ScalarTraits<Type>::real(minVal)) / avgVal - one) * 100;
67 double relMax = (as<double>(Teuchos::ScalarTraits<Type>::real(maxVal)) / avgVal - one) * 100;
68 buf << "avg = " << std::scientific << std::setw(10) << std::setprecision(2) << avgVal << ", "
69 << "dev = " << std::fixed << std::setw(6) << std::setprecision(1) << relDev << "%, "
70 << "min = " << std::fixed << std::setw(7) << std::setprecision(1) << std::setw(7) << relMin << "%"
71 << " (" << std::scientific << std::setw(10) << std::setprecision(2) << minVal << " on " << std::fixed << std::setw(4) << minProc << "), "
72 << "max = " << std::fixed << std::setw(7) << std::setprecision(1) << relMax << "%"
73 << " (" << std::scientific << std::setw(10) << std::setprecision(2) << maxVal << " on " << std::fixed << std::setw(4) << maxProc << ")";
74 } else {
75 double relDev = (avgVal != zero ? (devVal / avgVal) * 100 : zero);
76 buf << "avg = " << std::scientific << std::setw(10) << std::setprecision(2) << avgVal << ", "
77 << "dev = " << std::fixed << std::setw(6) << std::setprecision(1) << relDev << "%, "
78 << "min = " << std::scientific << std::setw(10) << std::setprecision(2) << minVal
79 << " (on " << std::fixed << std::setw(4) << minProc << "), "
80 << "max = " << std::scientific << std::setw(10) << std::setprecision(2) << maxVal
81 << " (on " << std::fixed << std::setw(4) << maxProc << ")";
82 }
83 return buf.str();
84}
85
86template <class Map>
87bool cmp_less(typename Map::value_type& v1, typename Map::value_type& v2) {
88 return v1.second < v2.second;
89}
90
91template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
92std::string PerfUtils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::PrintMatrixInfo(const Matrix& A, const std::string& msgTag, RCP<const ParameterList> params) {
93 if (!CheckMatrix(A))
94 return "";
95
96 typedef Xpetra::global_size_t global_size_t;
97
98 std::ostringstream ss;
99
100 ss << msgTag << " size = " << A.getGlobalNumRows() << " x " << A.getGlobalNumCols();
101 if (A.haveGlobalConstants())
102 ss << ", nnz = " << A.getGlobalNumEntries();
103 ss << std::endl;
104
105 if (params.is_null())
106 return ss.str();
107
108 bool printLoadBalanceInfo = false, printCommInfo = false, printEntryStats = false;
109 if (params->isParameter("printLoadBalancingInfo") && params->get<bool>("printLoadBalancingInfo"))
110 printLoadBalanceInfo = true;
111 if (params->isParameter("printCommInfo") && params->get<bool>("printCommInfo"))
112 printCommInfo = true;
113 if (params->isParameter("printEntryStats") && params->get<bool>("printEntryStats"))
114 printEntryStats = true;
115
116 if (!printLoadBalanceInfo && !printCommInfo && !printEntryStats)
117 return ss.str();
118
119 RCP<const Import> importer = A.getCrsGraph()->getImporter();
120 RCP<const Export> exporter = A.getCrsGraph()->getExporter();
121
122 size_t numMyNnz = A.getLocalNumEntries(), numMyRows = A.getLocalNumRows();
123
124 // Create communicator only for active processes
125 RCP<const Teuchos::Comm<int> > origComm = A.getRowMap()->getComm();
126 bool activeProc = true;
127 int numProc = origComm->getSize();
128 int numActiveProcs = 0;
129#ifdef HAVE_MPI
130 RCP<const Teuchos::MpiComm<int> > mpiComm = rcp_dynamic_cast<const Teuchos::MpiComm<int> >(origComm);
131 MPI_Comm rawComm = (*mpiComm->getRawMpiComm())();
132
133 std::vector<size_t> numRowsPerProc(numProc);
134 Teuchos::gatherAll(*origComm, 1, &numMyRows, numProc, &numRowsPerProc[0]);
135
136 int root = 0;
137 bool rootFlag = true;
138 for (int i = 0; i < numProc; i++) {
139 if (numRowsPerProc[i]) {
140 ++numActiveProcs;
141 if (rootFlag) {
142 root = i;
143 rootFlag = false;
144 }
145 }
146 }
147
148 if (numMyRows == 0) {
149 activeProc = false;
150 numMyNnz = 0;
151 } // Reset numMyNnz to avoid adding it up in reduceAll
152#else
153 if (numMyRows == 0) {
154 // FIXME JJH 10-May-2017 Is there any case in serial where numMyRows would be zero?
155 // Reset numMyNnz to avoid adding it up in reduceAll
156 numActiveProcs = 0;
157 activeProc = false;
158 numMyNnz = 0;
159 } else {
160 numActiveProcs = 1;
161 }
162#endif
163
164 std::string outstr;
165 ParameterList absList;
166 absList.set("print abs", true);
167
168 RCP<const Matrix> rcpA = rcpFromRef(A);
169 RCP<const CrsMatrixWrap> crsWrapA = rcp_dynamic_cast<const Xpetra::CrsMatrixWrap<Scalar, LocalOrdinal, GlobalOrdinal, Node> >(rcpA);
170 RCP<const CrsMatrix> crsA;
171 if (!crsWrapA.is_null())
172 crsA = crsWrapA->getCrsMatrix();
173 if (printEntryStats && !crsA.is_null()) {
174 typedef Teuchos::ScalarTraits<Scalar> STS;
175 typedef typename STS::magnitudeType magnitudeType;
176 typedef Teuchos::ScalarTraits<magnitudeType> MTS;
177 ArrayRCP<const size_t> rowptr_RCP;
178 ArrayRCP<const LocalOrdinal> colind_RCP;
179 ArrayRCP<const Scalar> vals_RCP;
180 ArrayRCP<size_t> offsets_RCP;
181 ArrayView<const size_t> rowptr;
182 ArrayView<const Scalar> vals;
183 ArrayView<size_t> offsets;
184
185 crsA->getAllValues(rowptr_RCP, colind_RCP, vals_RCP);
186 crsA->getLocalDiagOffsets(offsets_RCP);
187 rowptr = rowptr_RCP();
188 vals = vals_RCP();
189 offsets = offsets_RCP();
190
191 Scalar val, minVal, maxVal;
192 magnitudeType absVal, minAbsVal, maxAbsVal;
193 {
194 minVal = STS::rmax();
195 maxVal = STS::rmin();
196 minAbsVal = MTS::rmax();
197 maxAbsVal = MTS::zero();
198
199 for (int i = 0; i < offsets.size(); i++) {
200 val = vals[rowptr[i] + offsets[i]];
201 if (STS::real(val) < STS::real(minVal))
202 minVal = val;
203 if (STS::real(val) > STS::real(maxVal))
204 maxVal = val;
205 absVal = STS::magnitude(val);
206 minAbsVal = std::min(minAbsVal, absVal);
207 maxAbsVal = std::max(maxAbsVal, absVal);
208 }
209
210 ss << msgTag << " diag min : " << stringStats<Scalar>(origComm, numActiveProcs, minVal) << std::endl;
211 ss << msgTag << " diag max : " << stringStats<Scalar>(origComm, numActiveProcs, maxVal) << std::endl;
212 ss << msgTag << " abs(diag) min : " << stringStats<Scalar>(origComm, numActiveProcs, minAbsVal) << std::endl;
213 ss << msgTag << " abs(diag) max : " << stringStats<Scalar>(origComm, numActiveProcs, maxAbsVal) << std::endl;
214 }
215
216 {
217 minVal = STS::rmax();
218 maxVal = STS::rmin();
219 minAbsVal = MTS::rmax();
220 maxAbsVal = MTS::zero();
221
222 for (int i = 0; i < vals.size(); i++) {
223 val = vals[i];
224 if (STS::real(val) < STS::real(minVal))
225 minVal = val;
226 if (STS::real(val) > STS::real(maxVal))
227 maxVal = val;
228 absVal = STS::magnitude(val);
229 minAbsVal = std::min(minAbsVal, absVal);
230 maxAbsVal = std::max(maxAbsVal, absVal);
231 }
232
233 ss << msgTag << " entry min : " << stringStats<Scalar>(origComm, numActiveProcs, minVal) << std::endl;
234 ss << msgTag << " entry max : " << stringStats<Scalar>(origComm, numActiveProcs, maxVal) << std::endl;
235 ss << msgTag << " abs(entry) min : " << stringStats<Scalar>(origComm, numActiveProcs, minAbsVal) << std::endl;
236 ss << msgTag << " abs(entry) max : " << stringStats<Scalar>(origComm, numActiveProcs, maxAbsVal) << std::endl;
237 }
238 }
239
240 if (printLoadBalanceInfo) {
241 ss << msgTag << " Load balancing info" << std::endl;
242 ss << msgTag << " # active processes: " << numActiveProcs << "/" << numProc << std::endl;
243 ss << msgTag << " # rows per proc : " << stringStats<global_size_t>(origComm, numActiveProcs, numMyRows) << std::endl;
244 ss << msgTag << " # nnz per proc : " << stringStats<global_size_t>(origComm, numActiveProcs, numMyNnz) << std::endl;
245 }
246
247 if (printCommInfo && numActiveProcs != 1) {
248 typedef std::map<int, size_t> map_type;
249 map_type neighMap;
250 if (!importer.is_null()) {
251 ArrayView<const int> exportPIDs = importer->getExportPIDs();
252 if (exportPIDs.size())
253 for (int i = 0; i < exportPIDs.size(); i++)
254 neighMap[exportPIDs[i]]++;
255 }
256
257 // Communication volume
258 size_t numExportSend = 0;
259 size_t numImportSend = 0;
260 size_t numMsgs = 0;
261 size_t minMsg = 0;
262 size_t maxMsg = 0;
263
264 if (activeProc) {
265 numExportSend = (!exporter.is_null() ? exporter->getNumExportIDs() : 0);
266 numImportSend = (!importer.is_null() ? importer->getNumExportIDs() : 0);
267 numMsgs = neighMap.size();
268 map_type::const_iterator it = std::min_element(neighMap.begin(), neighMap.end(), cmp_less<map_type>);
269 minMsg = (it != neighMap.end() ? it->second : 0);
270 it = std::max_element(neighMap.begin(), neighMap.end(), cmp_less<map_type>);
271 maxMsg = (it != neighMap.end() ? it->second : 0);
272 }
273
274 ss << msgTag << " Communication info" << std::endl;
275 ss << msgTag << " # num export send : " << stringStats<global_size_t>(origComm, numActiveProcs, numExportSend) << std::endl;
276 ss << msgTag << " # num import send : " << stringStats<global_size_t>(origComm, numActiveProcs, numImportSend) << std::endl;
277 ss << msgTag << " # num msgs : " << stringStats<global_size_t>(origComm, numActiveProcs, numMsgs, rcpFromRef(absList)) << std::endl;
278 ss << msgTag << " # min msg size : " << stringStats<global_size_t>(origComm, numActiveProcs, minMsg) << std::endl;
279 ss << msgTag << " # max msg size : " << stringStats<global_size_t>(origComm, numActiveProcs, maxMsg) << std::endl;
280 }
281
282 outstr = ss.str();
283
284#ifdef HAVE_MPI
285 int strLength = outstr.size();
286 MPI_Bcast(&strLength, 1, MPI_INT, root, rawComm);
287 if (origComm->getRank() != root)
288 outstr.resize(strLength);
289 MPI_Bcast(&outstr[0], strLength, MPI_CHAR, root, rawComm);
290#endif
291
292 return outstr;
293}
294
295template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
296std::string PerfUtils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::PrintImporterInfo(RCP<const Import> importer, const std::string& msgTag) {
297 typedef Xpetra::global_size_t global_size_t;
298
299 std::ostringstream ss;
300
301 // Create communicator only for active processes
302 RCP<const Teuchos::Comm<int> > origComm = importer->getSourceMap()->getComm();
303 bool activeProc = true;
304 int numActiveProcs = origComm->getSize();
305#ifdef HAVE_MPI
306 RCP<const Teuchos::MpiComm<int> > mpiComm = rcp_dynamic_cast<const Teuchos::MpiComm<int> >(origComm);
307 MPI_Comm rawComm = (*mpiComm->getRawMpiComm())();
308 int root = 0;
309#endif
310
311 std::string outstr;
312 ParameterList absList;
313 absList.set("print abs", true);
314
315 typedef std::map<int, size_t> map_type;
316 map_type neighMap;
317 ArrayView<const int> exportPIDs = importer->getExportPIDs();
318 if (exportPIDs.size())
319 for (int i = 0; i < exportPIDs.size(); i++)
320 neighMap[exportPIDs[i]]++;
321
322 // Communication volume
323 size_t numImportSend = 0;
324 size_t numMsgs = 0;
325 size_t minMsg = 0;
326 size_t maxMsg = 0;
327
328 if (activeProc) {
329 numImportSend = importer->getNumExportIDs();
330 numMsgs = neighMap.size();
331 map_type::const_iterator it = std::min_element(neighMap.begin(), neighMap.end(), cmp_less<map_type>);
332 minMsg = (it != neighMap.end() ? it->second : 0);
333 it = std::max_element(neighMap.begin(), neighMap.end(), cmp_less<map_type>);
334 maxMsg = (it != neighMap.end() ? it->second : 0);
335 }
336
337 ss << msgTag << " Communication info" << std::endl;
338 ss << msgTag << " # num import send : " << stringStats<global_size_t>(origComm, numActiveProcs, numImportSend) << std::endl;
339 ss << msgTag << " # num msgs : " << stringStats<global_size_t>(origComm, numActiveProcs, numMsgs, rcpFromRef(absList)) << std::endl;
340 ss << msgTag << " # min msg size : " << stringStats<global_size_t>(origComm, numActiveProcs, minMsg) << std::endl;
341 ss << msgTag << " # max msg size : " << stringStats<global_size_t>(origComm, numActiveProcs, maxMsg) << std::endl;
342
343 outstr = ss.str();
344
345#ifdef HAVE_MPI
346 int strLength = outstr.size();
347 MPI_Bcast(&strLength, 1, MPI_INT, root, rawComm);
348 if (origComm->getRank() != root)
349 outstr.resize(strLength);
350 MPI_Bcast(&outstr[0], strLength, MPI_CHAR, root, rawComm);
351#endif
352
353 return outstr;
354}
355
356template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
357std::string PerfUtils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::CommPattern(const Matrix& A, const std::string& msgTag, RCP<const ParameterList> /* params */) {
358 if (!CheckMatrix(A))
359 return "";
360
361 std::ostringstream out;
362
363 RCP<const Teuchos::Comm<int> > comm = A.getRowMap()->getComm();
364 int myRank = comm->getRank();
365
366 out << msgTag << " " << myRank << ":";
367
368 RCP<const Import> importer = (A.getCrsGraph() != Teuchos::null ? A.getCrsGraph()->getImporter() : Teuchos::null);
369 if (importer.is_null()) {
370 out << std::endl;
371 return out.str();
372 }
373
374 ArrayView<const int> exportPIDs = importer->getExportPIDs();
375
376 if (exportPIDs.size()) {
377 // NodeTE: exportPIDs is sorted but not unique ( 1 1 1 2 2 3 4 4 4 )
378 int neigh = exportPIDs[0];
379 GO weight = 1;
380 for (int i = 1; i < exportPIDs.size(); i++) {
381 if (exportPIDs[i] != exportPIDs[i - 1]) {
382 out << " " << neigh << "(" << weight << ")";
383
384 neigh = exportPIDs[i];
385 weight = 1;
386
387 } else {
388 weight += 1;
389 }
390 }
391 out << " " << neigh << "(" << weight << ")" << std::endl;
392 }
393
394 return out.str();
395}
396
397template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
399 // We can only print statistics for matrices that have a crs graph. A
400 // potential issue is regarding Xpetra::TpetraBlockCrsMatrix which has no
401 // CrsGraph. It is held as a private data member by Xpetra::CrsMatrix,
402 // which itself is an Xpetra::Matrix. So we check directly whether the
403 // request for the graph throws.
404 bool hasCrsGraph = true;
405 try {
406 A.getCrsGraph();
407
408 } catch (...) {
409 hasCrsGraph = false;
410 }
411
412 return hasCrsGraph;
413}
414
415} // namespace MueLu
416
417#endif // MUELU_PERFUTILS_DEF_HPP
#define MueLu_maxAll(rcpComm, in, out)
#define MueLu_sumAll(rcpComm, in, out)
#define MueLu_minAll(rcpComm, in, out)
MueLu::DefaultScalar Scalar
static bool CheckMatrix(const Matrix &A)
static std::string PrintImporterInfo(RCP< const Import > importer, const std::string &msgTag)
static std::string PrintMatrixInfo(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
static std::string CommPattern(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
Namespace for MueLu classes and methods.
bool cmp_less(typename Map::value_type &v1, typename Map::value_type &v2)
std::string stringStats(const RCP< const Teuchos::Comm< int > > &comm, int numActiveProcs, const Type &v, RCP< ParameterList > paramList=Teuchos::null)
void calculateStats(Type &minVal, Type &maxVal, double &avgVal, double &devVal, int &minProc, int &maxProc, const RCP< const Teuchos::Comm< int > > &comm, int numActiveProcs, const Type &v)