Teuchos - Trilinos Tools Package Version of the Day
Loading...
Searching...
No Matches
Teuchos_TimeMonitor.cpp
1// @HEADER
2// *****************************************************************************
3// Teuchos: Common Tools Package
4//
5// Copyright 2004 NTESS and the Teuchos contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
11#include "Teuchos_CommHelpers.hpp"
12#include "Teuchos_DefaultComm.hpp"
15#include "Teuchos_StandardParameterEntryValidators.hpp"
17#include "Teuchos_StackedTimer.hpp"
18
19#include <functional>
20#include <iomanip>
21#ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
22#include <sstream>
23#endif
24
25namespace Teuchos {
78 template<class Ordinal, class ScalarType, class IndexType>
79 class MaxLoc :
80 public ValueTypeReductionOp<Ordinal, std::pair<ScalarType, IndexType> > {
81 public:
82 void
83 reduce (const Ordinal count,
84 const std::pair<ScalarType, IndexType> inBuffer[],
85 std::pair<ScalarType, IndexType> inoutBuffer[]) const;
86 };
87
88 template<class Ordinal>
89 class MaxLoc<Ordinal, double, int> :
90 public ValueTypeReductionOp<Ordinal, std::pair<double, int> > {
91 public:
92 void
93 reduce (const Ordinal count,
94 const std::pair<double, int> inBuffer[],
95 std::pair<double, int> inoutBuffer[]) const
96 {
97 for (Ordinal ind = 0; ind < count; ++ind) {
98 const std::pair<double, int>& in = inBuffer[ind];
99 std::pair<double, int>& inout = inoutBuffer[ind];
100
101 if (in.first > inout.first) {
102 inout.first = in.first;
103 inout.second = in.second;
104 } else if (in.first < inout.first) {
105 // Don't need to do anything; inout has the values.
106 } else { // equal, or at least one is NaN.
107 inout.first = in.first;
108 inout.second = std::min (in.second, inout.second);
109 }
110 }
111 }
112 };
113
140 template<class Ordinal, class ScalarType, class IndexType>
141 class MinLoc :
142 public ValueTypeReductionOp<Ordinal, std::pair<ScalarType, IndexType> > {
143 public:
144 void
145 reduce (const Ordinal count,
146 const std::pair<ScalarType, IndexType> inBuffer[],
147 std::pair<ScalarType, IndexType> inoutBuffer[]) const;
148 };
149
150 template<class Ordinal>
151 class MinLoc<Ordinal, double, int> :
152 public ValueTypeReductionOp<Ordinal, std::pair<double, int> > {
153 public:
154 void
155 reduce (const Ordinal count,
156 const std::pair<double, int> inBuffer[],
157 std::pair<double, int> inoutBuffer[]) const
158 {
159 for (Ordinal ind = 0; ind < count; ++ind) {
160 const std::pair<double, int>& in = inBuffer[ind];
161 std::pair<double, int>& inout = inoutBuffer[ind];
162
163 if (in.first < inout.first) {
164 inout.first = in.first;
165 inout.second = in.second;
166 } else if (in.first > inout.first) {
167 // Don't need to do anything; inout has the values.
168 } else { // equal, or at least one is NaN.
169 inout.first = in.first;
170 inout.second = std::min (in.second, inout.second);
171 }
172 }
173 }
174 };
175
179 template<class Ordinal, class ScalarType, class IndexType>
181 public ValueTypeReductionOp<Ordinal, std::pair<ScalarType, IndexType> > {
182 public:
183 void
184 reduce (const Ordinal count,
185 const std::pair<ScalarType, IndexType> inBuffer[],
186 std::pair<ScalarType, IndexType> inoutBuffer[]) const;
187 };
188
189 template<class Ordinal>
190 class MinLocNonzero<Ordinal, double, int> :
191 public ValueTypeReductionOp<Ordinal, std::pair<double, int> > {
192 public:
193 void
194 reduce (const Ordinal count,
195 const std::pair<double, int> inBuffer[],
196 std::pair<double, int> inoutBuffer[]) const
197 {
198 for (Ordinal ind = 0; ind < count; ++ind) {
199 const std::pair<double, int>& in = inBuffer[ind];
200 std::pair<double, int>& inout = inoutBuffer[ind];
201
202 if ( (in.first < inout.first && in.first != 0) || (inout.first == 0 && in.first != 0) ) {
203 inout.first = in.first;
204 inout.second = in.second;
205 } else if (in.first > inout.first) {
206 // Don't need to do anything; inout has the values.
207 } else { // equal, or at least one is NaN.
208 inout.first = in.first;
209 inout.second = std::min (in.second, inout.second);
210 }
211 }
212 }
213 };
214
215 constexpr const std::string_view defaultStackedTimerName = "Teuchos::StackedTimer";
216
217 // Typedef used internally by TimeMonitor::summarize() and its
218 // helper functions. The map is keyed on timer label (a string).
219 // Each value is a pair: (total number of seconds over all calls to
220 // that timer, total number of calls to that timer).
221 typedef std::map<std::string, std::pair<double, int> > timer_map_t;
222
223 // static initialization
225
228 {
229 if (!isRecursiveCall()) {
230 counter().start(reset);
231#ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
233 stackedTimer_->start(counter().name(),false);
234#endif
235 }
236 }
237
239 if (!isRecursiveCall()) {
240 counter().stop();
241#ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
242 try {
244 stackedTimer_->stop(counter().name(),false);
245 }
246 catch (std::runtime_error& e) {
247 std::ostringstream warning;
248 warning <<
249 "\n*********************************************************************\n"
250 "WARNING: Overlapping timers detected! Near: " <<counter().name()<<"\n"
251 "A TimeMonitor timer was stopped before a nested subtimer was\n"
252 "stopped. This is not allowed by the StackedTimer. This corner case\n"
253 "typically occurs if the TimeMonitor is stored in an RCP and the RCP is\n"
254 "assigned to a new timer. To disable this warning, either fix the\n"
255 "ordering of timer creation and destuction or disable the StackedTimer\n"
256 "support in the TimeMonitor by setting the StackedTimer to null\n"
257 "Example:\n"
258 " RCP<TimeMonitor> MM = rcp(new TimeMonitor(*(TimeMonitor::getNewTimer(\"Junk\"))));\n"
259 "///code to time \n"
260 "MM = Teuchos::null;\n"
261 "MM = rcp(new TimeMonitor(*(TimeMonitor::getNewTimer(\"SecondJunk\"))));\n"
262 "*********************************************************************\n";
263 std::cout << warning.str() << std::endl << e.what() << std::endl;
265 }
266#endif
267 }
268 }
269
270 void
271 TimeMonitor::disableTimer (const std::string& name)
272 {
275 timer == null, std::invalid_argument,
276 "TimeMonitor::disableTimer: Invalid timer \"" << name << "\"");
277 timer->disable ();
278 }
279
280 void
281 TimeMonitor::enableTimer (const std::string& name)
282 {
285 timer == null, std::invalid_argument,
286 "TimeMonitor::enableTimer: Invalid timer \"" << name << "\"");
287 timer->enable ();
288 }
289
290 void
292 {
293 typedef std::map<std::string, RCP<Time> > map_type;
294 typedef map_type::iterator iter_type;
295 map_type& ctrs = counters ();
296
297 // In debug mode, loop first to check whether any of the timers
298 // are running, before resetting them. This ensures that this
299 // method satisfies the strong exception guarantee (either it
300 // completes normally, or there are no side effects).
301#ifdef TEUCHOS_DEBUG
302 for (iter_type it = ctrs.begin(); it != ctrs.end(); ++it) {
303 // We throw a runtime_error rather than a logic_error, because
304 // logic_error suggests a bug in the implementation of
305 // TimeMonitor. Calling zeroOutTimers() when a timer is running
306 // is not TimeMonitor's fault.
308 it->second->isRunning (), std::runtime_error,
309 "Timer \"" << it->second->name () << "\" is currently running. "
310 "You are not allowed to reset running timers.");
311 }
312#endif // TEUCHOS_DEBUG
313
314 for (iter_type it = ctrs.begin(); it != ctrs.end(); ++it) {
315 it->second->reset ();
316 }
317 }
318
319 // An anonymous namespace is the standard way of limiting linkage of
320 // its contained routines to file scope.
321 namespace {
322 // \brief Return an "empty" local timer datum.
323 //
324 // "Empty" means the datum has zero elapsed time and zero call
325 // count. This function does not actually create a timer.
326 //
327 // \param name The timer's name.
328 std::pair<std::string, std::pair<double, int> >
329 makeEmptyTimerDatum (const std::string& name)
330 {
331 return std::make_pair (name, std::make_pair (double(0), int(0)));
332 }
333
334 // \fn collectLocalTimerData
335 // \brief Collect and sort local timer data by timer names.
336 //
337 // \param localData [out] Map whose keys are the timer names, and
338 // whose value for each key is the total elapsed time (in
339 // seconds) and the call count for the timer with that name.
340 //
341 // \param localCounters [in] Timers from which to extract data.
342 //
343 // \param filter [in] Filter for timer labels. If filter is not
344 // empty, this method will only collect data for local timers
345 // whose labels begin with this string.
346 //
347 // Extract the total elapsed time and call count from each timer
348 // in the given array. Merge results for timers with duplicate
349 // labels, by summing their total elapsed times and call counts
350 // pairwise.
351 void
352 collectLocalTimerData (timer_map_t& localData,
353 const std::map<std::string, RCP<Time> >& localCounters,
354 const std::string& filter="")
355 {
356 using std::make_pair;
357 typedef timer_map_t::iterator iter_t;
358
359 timer_map_t theLocalData;
360 for (std::map<std::string, RCP<Time> >::const_iterator it = localCounters.begin();
361 it != localCounters.end(); ++it) {
362 const std::string& name = it->second->name ();
363
364 // Filter current timer name, if provided filter is nonempty.
365 // Filter string must _start_ the timer label, not just be in it.
366 const bool skipThisOne = (filter != "" && name.find (filter) != 0);
367 if (! skipThisOne) {
368 const double timing = it->second->totalElapsedTime ();
369 const int numCalls = it->second->numCalls ();
370
371 // Merge timers with duplicate labels, by summing their
372 // total elapsed times and call counts.
373 iter_t loc = theLocalData.find (name);
374 if (loc == theLocalData.end()) {
375 // Use loc as an insertion location hint.
376 theLocalData.insert (loc, make_pair (name, make_pair (timing, numCalls)));
377 }
378 else {
379 loc->second.first += timing;
380 loc->second.second += numCalls;
381 }
382 }
383 }
384 // This avoids copying the map, and also makes this method
385 // satisfy the strong exception guarantee.
386 localData.swap (theLocalData);
387 }
388
389 // \brief Locally filter out timer data with zero call counts.
390 //
391 // \param timerData [in/out]
392 void
393 filterZeroData (timer_map_t& timerData)
394 {
395 // FIXME (mfh 15 Mar 2013) Should use std::map::erase with
396 // iterator hint, instead of rebuilding the map completely.
397 timer_map_t newTimerData;
398 for (timer_map_t::const_iterator it = timerData.begin();
399 it != timerData.end(); ++it) {
400 if (it->second.second > 0) {
401 newTimerData[it->first] = it->second;
402 }
403 }
404 timerData.swap (newTimerData);
405 }
406
428 void
429 collectLocalTimerDataAndNames (timer_map_t& localTimerData,
430 Array<std::string>& localTimerNames,
431 const std::map<std::string, RCP<Time> >& localTimers,
432 const bool writeZeroTimers,
433 const std::string& filter="")
434 {
435 // Collect and sort local timer data by timer names.
436 collectLocalTimerData (localTimerData, localTimers, filter);
437
438 // Filter out zero data locally first. This ensures that if we
439 // are writing global stats, and if a timer name exists in the
440 // set of global names, then that timer has a nonzero call count
441 // on at least one MPI process.
442 if (! writeZeroTimers) {
443 filterZeroData (localTimerData);
444 }
445
446 // Extract the set of local timer names. The std::map keeps
447 // them sorted alphabetically.
448 localTimerNames.reserve (localTimerData.size());
449 for (timer_map_t::const_iterator it = localTimerData.begin();
450 it != localTimerData.end(); ++it) {
451 localTimerNames.push_back (it->first);
452 }
453 }
454
489 void
490 collectGlobalTimerData (timer_map_t& globalTimerData,
491 Array<std::string>& globalTimerNames,
492 timer_map_t& localTimerData,
493 Array<std::string>& localTimerNames,
494 Ptr<const Comm<int> > comm,
495 const bool alwaysWriteLocal,
496 const ECounterSetOp setOp)
497 {
498 // There may be some global timers that are not local timers on
499 // the calling MPI process(es). In that case, if
500 // alwaysWriteLocal is true, then we need to fill in the
501 // "missing" local timers. That will ensure that both global
502 // and local timer columns in the output table have the same
503 // number of rows. The collectLocalTimerDataAndNames() method
504 // may have already filtered out local timers with zero call
505 // counts (if its writeZeroTimers argument was false), but we
506 // won't be filtering again. Thus, any local timer data we
507 // insert here won't get filtered out.
508 //
509 // Note that calling summarize() with writeZeroTimers == false
510 // will still do what it says, even if we insert local timers
511 // with zero call counts here.
512
513 // This does the correct and inexpensive thing (just copies the
514 // timer data) if numProcs == 1. Otherwise, it initiates a
515 // communication with \f$O(\log P)\f$ messages along the
516 // critical path, where \f$P\f$ is the number of participating
517 // processes.
518 mergeCounterNames (*comm, localTimerNames, globalTimerNames, setOp);
519
520#ifdef TEUCHOS_DEBUG
521 {
522 // Sanity check that all processes have the name number of
523 // global timer names.
524 const timer_map_t::size_type myNumGlobalNames = globalTimerNames.size();
525 timer_map_t::size_type minNumGlobalNames = 0;
526 timer_map_t::size_type maxNumGlobalNames = 0;
527 reduceAll (*comm, REDUCE_MIN, myNumGlobalNames,
528 outArg (minNumGlobalNames));
529 reduceAll (*comm, REDUCE_MAX, myNumGlobalNames,
530 outArg (maxNumGlobalNames));
531 TEUCHOS_TEST_FOR_EXCEPTION(minNumGlobalNames != maxNumGlobalNames,
532 std::logic_error, "Min # global timer names = " << minNumGlobalNames
533 << " != max # global timer names = " << maxNumGlobalNames
534 << ". Please report this bug to the Teuchos developers.");
535 TEUCHOS_TEST_FOR_EXCEPTION(myNumGlobalNames != minNumGlobalNames,
536 std::logic_error, "My # global timer names = " << myNumGlobalNames
537 << " != min # global timer names = " << minNumGlobalNames
538 << ". Please report this bug to the Teuchos developers.");
539 }
540#endif // TEUCHOS_DEBUG
541
542 // mergeCounterNames() just merges the counters' names, not
543 // their actual data. Now we need to fill globalTimerData with
544 // this process' timer data for the timers in globalTimerNames.
545 //
546 // All processes need the full list of global timers, since
547 // there may be some global timers that are not local timers.
548 // That's why mergeCounterNames() has to be an all-reduce, not
549 // just a reduction to Proc 0.
550 //
551 // Insertion optimization: if the iterator given to map::insert
552 // points right before where we want to insert, insertion is
553 // O(1). globalTimerNames is sorted, so feeding the iterator
554 // output of map::insert into the next invocation's input should
555 // make the whole insertion O(N) where N is the number of
556 // entries in globalTimerNames.
557 timer_map_t::iterator globalMapIter = globalTimerData.begin();
558 timer_map_t::iterator localMapIter;
559 for (Array<string>::const_iterator it = globalTimerNames.begin();
560 it != globalTimerNames.end(); ++it) {
561 const std::string& globalName = *it;
562 localMapIter = localTimerData.find (globalName);
563
564 if (localMapIter == localTimerData.end()) {
565 if (alwaysWriteLocal) {
566 // If there are some global timers that are not local
567 // timers, and if we want to print local timers, we insert
568 // a local timer datum with zero elapsed time and zero
569 // call count into localTimerData as well. This will
570 // ensure that both global and local timer columns in the
571 // output table have the same number of rows.
572 //
573 // We really only need to do this on Proc 0, which is the
574 // only process that currently may print local timers.
575 // However, we do it on all processes, just in case
576 // someone later wants to modify this function to print
577 // out local timer data for some process other than Proc
578 // 0. This extra computation won't affect the cost along
579 // the critical path, for future computations in which
580 // Proc 0 participates.
581 localMapIter = localTimerData.insert (localMapIter, makeEmptyTimerDatum (globalName));
582
583 // Make sure the missing global name gets added to the
584 // list of local names. We'll re-sort the list of local
585 // names below.
586 localTimerNames.push_back (globalName);
587 }
588 // There's a global timer that's not a local timer. Add it
589 // to our pre-merge version of the global timer data so that
590 // we can safely merge the global timer data later.
591 globalMapIter = globalTimerData.insert (globalMapIter, makeEmptyTimerDatum (globalName));
592 }
593 else {
594 // We have this global timer name in our local timer list.
595 // Fill in our pre-merge version of the global timer data
596 // with our local data.
597 globalMapIter = globalTimerData.insert (globalMapIter, std::make_pair (globalName, localMapIter->second));
598 }
599 }
600
601 if (alwaysWriteLocal) {
602 // Re-sort the list of local timer names, since we may have
603 // inserted "missing" names above.
604 std::sort (localTimerNames.begin(), localTimerNames.end());
605 }
606
607#ifdef TEUCHOS_DEBUG
608 {
609 // Sanity check that all processes have the name number of
610 // global timers.
611 const timer_map_t::size_type myNumGlobalTimers = globalTimerData.size();
612 timer_map_t::size_type minNumGlobalTimers = 0;
613 timer_map_t::size_type maxNumGlobalTimers = 0;
614 reduceAll (*comm, REDUCE_MIN, myNumGlobalTimers,
615 outArg (minNumGlobalTimers));
616 reduceAll (*comm, REDUCE_MAX, myNumGlobalTimers,
617 outArg (maxNumGlobalTimers));
618 TEUCHOS_TEST_FOR_EXCEPTION(minNumGlobalTimers != maxNumGlobalTimers,
619 std::logic_error, "Min # global timers = " << minNumGlobalTimers
620 << " != max # global timers = " << maxNumGlobalTimers
621 << ". Please report this bug to the Teuchos developers.");
622 TEUCHOS_TEST_FOR_EXCEPTION(myNumGlobalTimers != minNumGlobalTimers,
623 std::logic_error, "My # global timers = " << myNumGlobalTimers
624 << " != min # global timers = " << minNumGlobalTimers
625 << ". Please report this bug to the Teuchos developers.");
626 }
627#endif // TEUCHOS_DEBUG
628 }
629
676 void
677 computeGlobalTimerStats (stat_map_type& statData,
678 std::vector<std::string>& statNames,
679 Ptr<const Comm<int> > comm,
680 const timer_map_t& globalTimerData,
681 const bool ignoreZeroTimers)
682 {
684
685 const int numTimers = static_cast<int> (globalTimerData.size());
686 const int numProcs = comm->getSize();
687
688 // Extract pre-reduction timings and call counts into a
689 // sequential array. This array will be in the same order as
690 // the global timer names are in the map.
691 Array<std::pair<double, int> > timingsAndCallCounts;
692 timingsAndCallCounts.reserve (numTimers);
693 for (timer_map_t::const_iterator it = globalTimerData.begin();
694 it != globalTimerData.end(); ++it) {
695 timingsAndCallCounts.push_back (it->second);
696 }
697
698 // For each timer name, compute the min timing and its
699 // corresponding call count. If two processes have the same
700 // timing but different call counts, the minimum call count will
701 // be used.
702 Array<std::pair<double, int> > minTimingsAndCallCounts (numTimers);
703 if (numTimers > 0) {
704 if (ignoreZeroTimers)
705 reduceAll (*comm, MinLocNonzero<int, double, int>(), numTimers,
706 &timingsAndCallCounts[0], &minTimingsAndCallCounts[0]);
707 else
708 reduceAll (*comm, MinLoc<int, double, int>(), numTimers,
709 &timingsAndCallCounts[0], &minTimingsAndCallCounts[0]);
710 }
711
712 // For each timer name, compute the max timing and its
713 // corresponding call count. If two processes have the same
714 // timing but different call counts, the minimum call count will
715 // be used.
716 Array<std::pair<double, int> > maxTimingsAndCallCounts (numTimers);
717 if (numTimers > 0) {
718 reduceAll (*comm, MaxLoc<int, double, int>(), numTimers,
719 &timingsAndCallCounts[0], &maxTimingsAndCallCounts[0]);
720 }
721
722 // For each timer name, compute the mean-over-processes timing,
723 // the mean call count, and the mean-over-call-counts timing.
724 // The mean call count is reported as a double to allow a
725 // fractional value.
726 //
727 // Each local timing is really the total timing over all local
728 // invocations. The number of local invocations is the call
729 // count. Thus, the mean-over-call-counts timing is the sum of
730 // all the timings (over all processes), divided by the sum of
731 // all the call counts (over all processes). We compute it in a
732 // different way to over unnecessary overflow.
733 Array<double> meanOverCallCountsTimings (numTimers);
734 Array<double> meanOverProcsTimings (numTimers);
735 Array<double> meanCallCounts (numTimers);
736 Array<int> ICallThisTimer (numTimers);
737 Array<int> numProcsCallingEachTimer (numTimers);
738 {
739 // Figure out how many processors actually call each timer.
740 if (ignoreZeroTimers) {
741 for (int k = 0; k < numTimers; ++k) {
742 const double callCount = static_cast<double> (timingsAndCallCounts[k].second);
743 if (callCount > 0) ICallThisTimer[k] = 1;
744 else ICallThisTimer[k] = 0;
745 }
746 if (numTimers > 0) {
747 reduceAll (*comm, REDUCE_SUM, numTimers, &ICallThisTimer[0],
748 &numProcsCallingEachTimer[0]);
749 }
750 }
751
752 // When summing, first scale by the number of processes. This
753 // avoids unnecessary overflow, and also gives us the mean
754 // call count automatically.
755 Array<double> scaledTimings (numTimers);
756 Array<double> scaledCallCounts (numTimers);
757 const double P = static_cast<double> (numProcs);
758
759 if (ignoreZeroTimers) {
760 for (int k = 0; k < numTimers; ++k) {
761 const double timing = timingsAndCallCounts[k].first;
762 const double callCount = static_cast<double> (timingsAndCallCounts[k].second);
763
764 scaledTimings[k] = timing / numProcsCallingEachTimer[k];
765 scaledCallCounts[k] = callCount / numProcsCallingEachTimer[k];
766 }
767 }
768 else {
769 for (int k = 0; k < numTimers; ++k) {
770 const double timing = timingsAndCallCounts[k].first;
771 const double callCount = static_cast<double> (timingsAndCallCounts[k].second);
772
773 scaledTimings[k] = timing / P;
774 scaledCallCounts[k] = callCount / P;
775 }
776 }
777
778 if (numTimers > 0) {
779 reduceAll (*comm, REDUCE_SUM, numTimers, &scaledTimings[0],
780 &meanOverProcsTimings[0]);
781 reduceAll (*comm, REDUCE_SUM, numTimers, &scaledCallCounts[0],
782 &meanCallCounts[0]);
783 }
784 // We don't have to undo the scaling for the mean timings;
785 // just divide by the scaled call count.
786 for (int k = 0; k < numTimers; ++k) {
787 if (meanCallCounts[k] > ScalarTraits<double>::zero ()) {
788 meanOverCallCountsTimings[k] = meanOverProcsTimings[k] / meanCallCounts[k];
789 }
790 else {
791 meanOverCallCountsTimings[k] = ScalarTraits<double>::zero ();
792 }
793 }
794 }
795
796 // Reformat the data into the map of statistics. Be sure that
797 // each value (the std::vector of (timing, call count) pairs,
798 // each entry of which is a different statistic) preserves the
799 // order of statNames.
800 statNames.resize (4);
801 statNames[0] = "MinOverProcs";
802 statNames[1] = "MeanOverProcs";
803 statNames[2] = "MaxOverProcs";
804 statNames[3] = "MeanOverCallCounts";
805
806 stat_map_type::iterator statIter = statData.end();
807 timer_map_t::const_iterator it = globalTimerData.begin();
808 for (int k = 0; it != globalTimerData.end(); ++k, ++it) {
809 std::vector<std::pair<double, double> > curData (4);
810 curData[0] = minTimingsAndCallCounts[k];
811 curData[1] = std::make_pair (meanOverProcsTimings[k], meanCallCounts[k]);
812 curData[2] = maxTimingsAndCallCounts[k];
813 curData[3] = std::make_pair (meanOverCallCountsTimings[k], meanCallCounts[k]);
814
815 // statIter gives an insertion location hint that makes each
816 // insertion O(1), since we remember the location of the last
817 // insertion.
818 statIter = statData.insert (statIter, std::make_pair (it->first, curData));
819 }
820 }
821
822
839 RCP<const Comm<int> >
840 getDefaultComm ()
841 {
842 // The default communicator. If Trilinos was built with MPI
843 // enabled, this should be MPI_COMM_WORLD. (If MPI has not yet
844 // been initialized, it's not valid to use the communicator!)
845 // Otherwise, this should be a "serial" (no MPI, one "process")
846 // communicator.
847 RCP<const Comm<int> > comm = DefaultComm<int>::getComm ();
848
849#ifdef HAVE_MPI
850 {
851 int mpiHasBeenStarted = 0;
852 MPI_Initialized (&mpiHasBeenStarted);
853 if (! mpiHasBeenStarted) {
854 // Make pComm a new "serial communicator."
855 comm = rcp_implicit_cast<const Comm<int> > (rcp (new SerialComm<int> ()));
856 }
857 }
858#endif // HAVE_MPI
859 return comm;
860 }
861
862 } // namespace (anonymous)
863
864
865 void
867 std::vector<std::string>& statNames,
868 Ptr<const Comm<int> > comm,
869 const ECounterSetOp setOp,
870 const std::string& filter)
871 {
872 // Collect local timer data and names. Filter out timers with
873 // zero call counts if writeZeroTimers is false. Also, apply the
874 // timer label filter at this point, so we don't have to compute
875 // statistics on timers we don't want to display anyway.
876 timer_map_t localTimerData;
878 const bool writeZeroTimers = false;
881 // Merge the local timer data and names into global timer data and
882 // names.
883 timer_map_t globalTimerData;
885 const bool alwaysWriteLocal = false;
889 // Compute statistics on the data.
891 }
892
893
894 void
896 std::ostream& out,
897 const bool alwaysWriteLocal,
898 const bool writeGlobalStats,
899 const bool writeZeroTimers,
900 const ECounterSetOp setOp,
901 const std::string& filter,
902 const bool ignoreZeroTimers)
903 {
904 //
905 // We can't just call computeGlobalTimerStatistics(), since
906 // summarize() has different options that affect whether global
907 // statistics are computed and printed.
908 //
909 const int numProcs = comm->getSize();
910 const int myRank = comm->getRank();
911
912 // Collect local timer data and names. Filter out timers with
913 // zero call counts if writeZeroTimers is false. Also, apply the
914 // timer label filter at this point, so we don't have to compute
915 // statistics on timers we don't want to display anyway.
916 timer_map_t localTimerData;
920
921 // If we're computing global statistics, merge the local timer
922 // data and names into global timer data and names, and compute
923 // global timer statistics. Otherwise, leave the global data
924 // empty.
925 timer_map_t globalTimerData;
928 std::vector<std::string> statNames;
929 if (writeGlobalStats) {
933 // Compute statistics on the data, but only if the communicator
934 // contains more than one process. Otherwise, statistics don't
935 // make sense and we don't print them (see below).
936 if (numProcs > 1) {
938 }
939 }
940
941 // Precision of floating-point numbers in the table.
942 const int precision = format().precision();
943 const std::ios_base::fmtflags& flags = out.flags();
944
945 // All columns of the table, in order.
947
948 // Labels of all the columns of the table.
949 // We will append to this when we add each column.
951
952 // Widths (in number of characters) of each column.
953 // We will append to this when we add each column.
955
956 // Table column containing all timer names. If writeGlobalStats
957 // is true, we use the global timer names, otherwise we use the
958 // local timer names. We build the table on all processes
959 // redundantly, but only print on Rank 0.
960 {
961 titles.append ("Timer Name");
962
963 // The column labels depend on whether we are computing global statistics.
965 tableColumns.append (nameCol);
966
967 // Each column is as wide as it needs to be to hold both its
968 // title and all of the column data. This column's title is the
969 // current last entry of the titles array.
970 columnWidths.append (format().computeRequiredColumnWidth (titles.back(), nameCol));
971 }
972
973 // Table column containing local timer stats, if applicable. We
974 // only write local stats if asked, only on MPI Proc 0, and only
975 // if there is more than one MPI process in the communicator
976 // (otherwise local stats == global stats, so we just print the
977 // global stats). In this case, we've padded the local data on
978 // Proc 0 if necessary to match the global timer list, so that the
979 // columns have the same number of rows.
980 if (alwaysWriteLocal && numProcs > 1 && myRank == 0) {
981 titles.append ("Local time (num calls)");
982
983 // Copy local timer data out of the array-of-structs into
984 // separate arrays, for display in the table.
987 for (timer_map_t::const_iterator it = localTimerData.begin();
988 it != localTimerData.end(); ++it) {
989 localTimings.push_back (it->second.first);
990 localNumCalls.push_back (static_cast<double> (it->second.second));
991 }
993 tableColumns.append (timeAndCalls);
994 columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls));
995 }
996
997 if (writeGlobalStats) {
998 // If there's only 1 process in the communicator, don't display
999 // statistics; statistics don't make sense in that case. Just
1000 // display the timings and call counts. If there's more than 1
1001 // process, do display statistics.
1002 if (numProcs == 1) {
1003 // Extract timings and the call counts from globalTimerData.
1006 for (timer_map_t::const_iterator it = globalTimerData.begin();
1007 it != globalTimerData.end(); ++it) {
1008 globalTimings.push_back (it->second.first);
1009 globalNumCalls.push_back (static_cast<double> (it->second.second));
1010 }
1011 // Print the table column.
1012 titles.append ("Global time (num calls)");
1014 tableColumns.append (timeAndCalls);
1015 columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls));
1016 }
1017 else { // numProcs > 1
1018 // Print a table column for each statistic. statNames and
1019 // each value in statData use the same ordering, so we can
1020 // iterate over valid indices of statNames to display the
1021 // statistics in the right order.
1022 const timer_map_t::size_type numGlobalTimers = globalTimerData.size();
1023 for (std::vector<std::string>::size_type statInd = 0; statInd < statNames.size(); ++statInd) {
1024 // Extract lists of timings and their call counts for the
1025 // current statistic.
1028 stat_map_type::const_iterator it = statData.begin();
1029 for (int k = 0; it != statData.end(); ++it, ++k) {
1030 statTimings[k] = (it->second[statInd]).first;
1031 statCallCounts[k] = (it->second[statInd]).second;
1032 }
1033 // Print the table column.
1034 const std::string& statisticName = statNames[statInd];
1035 const std::string titleString = statisticName;
1036 titles.append (titleString);
1038 tableColumns.append (timeAndCalls);
1039 columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls));
1040 }
1041 }
1042 }
1043
1044 // Print the whole table to the given output stream on MPI Rank 0.
1045 format().setColumnWidths (columnWidths);
1046 if (myRank == 0) {
1047 std::ostringstream theTitle;
1048 theTitle << "TimeMonitor results over " << numProcs << " processor"
1049 << (numProcs > 1 ? "s" : "");
1050 format().writeWholeTable (out, theTitle.str(), titles, tableColumns);
1051 }
1052 }
1053
1054 void
1055 TimeMonitor::summarize (std::ostream &out,
1056 const bool alwaysWriteLocal,
1057 const bool writeGlobalStats,
1058 const bool writeZeroTimers,
1059 const ECounterSetOp setOp,
1060 const std::string& filter,
1061 const bool ignoreZeroTimers)
1062 {
1063 // The default communicator. If Trilinos was built with MPI
1064 // enabled, this should be MPI_COMM_WORLD. Otherwise, this should
1065 // be a "serial" (no MPI, one "process") communicator.
1067
1070 }
1071
1072 void
1074 std::vector<std::string>& statNames,
1075 const ECounterSetOp setOp,
1076 const std::string& filter)
1077 {
1078 // The default communicator. If Trilinos was built with MPI
1079 // enabled, this should be MPI_COMM_WORLD. Otherwise, this should
1080 // be a "serial" (no MPI, one "process") communicator.
1082
1084 }
1085
1087 : TimeMonitor(timer, reset), comm_(comm)
1088 { }
1089
1091 comm_->barrier();
1092 }
1093
1094
1095 namespace {
1119 std::string
1120 quoteLabelForYaml (const std::string& label)
1121 {
1122 // YAML allows empty keys in key: value pairs. See Section 7.2
1123 // of the YAML 1.2 spec. We thus let an empty label pass
1124 // through without quoting or other special treatment.
1125 if (label.empty ()) {
1126 return label;
1127 }
1128
1129 // Check whether the label is already quoted. If so, we don't
1130 // need to quote it again. However, we do need to quote any
1131 // quote symbols in the string inside the outer quotes.
1132 const bool alreadyQuoted = label.size () >= 2 &&
1133 label[0] == '"' && label[label.size() - 1] == '"';
1134
1135 // We need to quote if there are any colons or (inner) quotes in
1136 // the string. We'll determine this as we read through the
1137 // string and escape any characters that need escaping.
1138 bool needToQuote = false;
1139
1140 std::string out; // To fill with the return value
1141 out.reserve (label.size ());
1142
1143 const size_t startPos = alreadyQuoted ? 1 : 0;
1144 const size_t endPos = alreadyQuoted ? label.size () - 1 : label.size ();
1145 for (size_t i = startPos; i < endPos; ++i) {
1146 const char c = label[i];
1147 if (c == '"' || c == '\\') {
1148 out.push_back ('\\'); // Escape the quote or backslash.
1149 needToQuote = true;
1150 }
1151 else if (c == ':') {
1152 needToQuote = true;
1153 }
1154 out.push_back (c);
1155 }
1156
1157 if (needToQuote || alreadyQuoted) {
1158 // If the input string was already quoted, then out doesn't
1159 // include its quotes, so we have to add them back in.
1160 return "\"" + out + "\"";
1161 }
1162 else {
1163 return out;
1164 }
1165 }
1166
1167 } // namespace (anonymous)
1168
1169
1170 void TimeMonitor::
1171 summarizeToYaml (Ptr<const Comm<int> > comm,
1172 std::ostream &out,
1173 const ETimeMonitorYamlFormat yamlStyle,
1174 const std::string& filter)
1175 {
1177 using Teuchos::fancyOStream;
1178 using Teuchos::getFancyOStream;
1179 using Teuchos::OSTab;
1180 using Teuchos::RCP;
1181 using Teuchos::rcpFromRef;
1182 using std::endl;
1183 typedef std::vector<std::string>::size_type size_type;
1184
1185 const bool compact = (yamlStyle == YAML_FORMAT_COMPACT);
1186
1187 // const bool writeGlobalStats = true;
1188 // const bool writeZeroTimers = true;
1189 // const bool alwaysWriteLocal = false;
1190 const ECounterSetOp setOp = Intersection;
1191
1192 stat_map_type statData;
1193 std::vector<std::string> statNames;
1194 computeGlobalTimerStatistics (statData, statNames, comm, setOp, filter);
1195
1196 const int numProcs = comm->getSize();
1197
1198 // HACK (mfh 20 Aug 2012) For some reason, creating OSTab with "-
1199 // " as the line prefix does not work, else I would prefer that
1200 // method for printing each line of a YAML block sequence (see
1201 // Section 8.2.1 of the YAML 1.2 spec).
1202 //
1203 // Also, I have to set the tab indent string here, rather than in
1204 // OSTab's constructor. This is because line prefix (which for
1205 // some reason is what OSTab's constructor takes, rather than tab
1206 // indent string) means something different from tab indent
1207 // string, and turning on the line prefix prints all sorts of
1208 // things including "|" for some reason.
1209 RCP<FancyOStream> pfout = getFancyOStream (rcpFromRef (out));
1210 pfout->setTabIndentStr (" ");
1211 FancyOStream& fout = *pfout;
1212
1213 fout << "# Teuchos::TimeMonitor report" << endl
1214 << "---" << endl;
1215
1216 // mfh 19 Aug 2012: An important goal of our chosen output format
1217 // was to minimize the nesting depth. We have managed to keep the
1218 // nesting depth to 3, which is the limit that the current version
1219 // of PylotDB imposes for its YAML input.
1220
1221 // Outermost level is a dictionary. (Individual entries of a
1222 // dictionary do _not_ begin with "- ".) We always print the
1223 // outermost level in standard style, not flow style, for better
1224 // readability. We begin the outermost level with metadata.
1225 fout << "Output mode: " << (compact ? "compact" : "spacious") << endl
1226 << "Number of processes: " << numProcs << endl
1227 << "Time unit: s" << endl;
1228 // For a key: value pair where the value is a sequence or
1229 // dictionary on the following line, YAML requires a space after
1230 // the colon.
1231 fout << "Statistics collected: ";
1232 // Print list of the names of all the statistics we collected.
1233 if (compact) {
1234 fout << " [";
1235 for (size_type i = 0; i < statNames.size (); ++i) {
1236 fout << quoteLabelForYaml (statNames[i]);
1237 if (i + 1 < statNames.size ()) {
1238 fout << ", ";
1239 }
1240 }
1241 fout << "]" << endl;
1242 }
1243 else {
1244 fout << endl;
1245 OSTab tab1 (pfout);
1246 for (size_type i = 0; i < statNames.size (); ++i) {
1247 fout << "- " << quoteLabelForYaml (statNames[i]) << endl;
1248 }
1249 }
1250
1251 // Print the list of timer names.
1252 //
1253 // It might be nicer instead to print a map from timer name to all
1254 // of its data, but keeping the maximum nesting depth small
1255 // ensures better compatibility with different parsing tools.
1256 fout << "Timer names: ";
1257 if (compact) {
1258 fout << " [";
1259 size_type ind = 0;
1260 for (stat_map_type::const_iterator it = statData.begin();
1261 it != statData.end(); ++it, ++ind) {
1262 fout << quoteLabelForYaml (it->first);
1263 if (ind + 1 < statData.size ()) {
1264 fout << ", ";
1265 }
1266 }
1267 fout << "]" << endl;
1268 }
1269 else {
1270 fout << endl;
1271 OSTab tab1 (pfout);
1272 for (stat_map_type::const_iterator it = statData.begin();
1273 it != statData.end(); ++it) {
1274 fout << "- " << quoteLabelForYaml (it->first) << endl;
1275 }
1276 }
1277
1278 // Print times for each timer, as a map from statistic name to its time.
1279 fout << "Total times: ";
1280 if (compact) {
1281 fout << " {";
1282 size_type outerInd = 0;
1283 for (stat_map_type::const_iterator outerIter = statData.begin();
1284 outerIter != statData.end(); ++outerIter, ++outerInd) {
1285 // Print timer name.
1286 fout << quoteLabelForYaml (outerIter->first) << ": ";
1287 // Print that timer's data.
1288 const std::vector<std::pair<double, double> >& curData = outerIter->second;
1289 fout << "{";
1290 for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
1291 fout << quoteLabelForYaml (statNames[innerInd]) << ": "
1292 << curData[innerInd].first;
1293 if (innerInd + 1 < curData.size ()) {
1294 fout << ", ";
1295 }
1296 }
1297 fout << "}";
1298 if (outerInd + 1 < statData.size ()) {
1299 fout << ", ";
1300 }
1301 }
1302 fout << "}" << endl;
1303 }
1304 else {
1305 fout << endl;
1306 OSTab tab1 (pfout);
1307 size_type outerInd = 0;
1308 for (stat_map_type::const_iterator outerIter = statData.begin();
1309 outerIter != statData.end(); ++outerIter, ++outerInd) {
1310 // Print timer name.
1311 fout << quoteLabelForYaml (outerIter->first) << ": " << endl;
1312 // Print that timer's data.
1313 OSTab tab2 (pfout);
1314 const std::vector<std::pair<double, double> >& curData = outerIter->second;
1315 for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
1316 fout << quoteLabelForYaml (statNames[innerInd]) << ": "
1317 << curData[innerInd].first << endl;
1318 }
1319 }
1320 }
1321
1322 // Print call counts for each timer, for each statistic name.
1323 fout << "Call counts:";
1324 if (compact) {
1325 fout << " {";
1326 size_type outerInd = 0;
1327 for (stat_map_type::const_iterator outerIter = statData.begin();
1328 outerIter != statData.end(); ++outerIter, ++outerInd) {
1329 // Print timer name.
1330 fout << quoteLabelForYaml (outerIter->first) << ": ";
1331 // Print that timer's data.
1332 const std::vector<std::pair<double, double> >& curData = outerIter->second;
1333 fout << "{";
1334 for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
1335 fout << quoteLabelForYaml (statNames[innerInd]) << ": "
1336 << curData[innerInd].second;
1337 if (innerInd + 1 < curData.size ()) {
1338 fout << ", ";
1339 }
1340 }
1341 fout << "}";
1342 if (outerInd + 1 < statData.size ()) {
1343 fout << ", ";
1344 }
1345 }
1346 fout << "}" << endl;
1347 }
1348 else {
1349 fout << endl;
1350 OSTab tab1 (pfout);
1351 size_type outerInd = 0;
1352 for (stat_map_type::const_iterator outerIter = statData.begin();
1353 outerIter != statData.end(); ++outerIter, ++outerInd) {
1354 // Print timer name.
1355 fout << quoteLabelForYaml (outerIter->first) << ": " << endl;
1356 // Print that timer's data.
1357 OSTab tab2 (pfout);
1358 const std::vector<std::pair<double, double> >& curData = outerIter->second;
1359 for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
1360 fout << quoteLabelForYaml (statNames[innerInd]) << ": "
1361 << curData[innerInd].second << endl;
1362 }
1363 }
1364 }
1365 }
1366
1367 void TimeMonitor::
1368 summarizeToYaml (std::ostream &out,
1369 const ETimeMonitorYamlFormat yamlStyle,
1370 const std::string& filter)
1371 {
1372 // The default communicator. If Trilinos was built with MPI
1373 // enabled, this should be MPI_COMM_WORLD. Otherwise, this should
1374 // be a "serial" (no MPI, one "process") communicator.
1375 RCP<const Comm<int> > comm = getDefaultComm ();
1376
1377 summarizeToYaml (comm.ptr (), out, yamlStyle, filter);
1378 }
1379
1380 // Default value is false. We'll set to true once
1381 // setReportParameters() completes successfully.
1382 bool TimeMonitor::setParams_ = false;
1383
1384 // We have to declare all of these here in order to avoid linker errors.
1385 TimeMonitor::ETimeMonitorReportFormat TimeMonitor::reportFormat_ = TimeMonitor::REPORT_FORMAT_TABLE;
1386 TimeMonitor::ETimeMonitorYamlFormat TimeMonitor::yamlStyle_ = TimeMonitor::YAML_FORMAT_SPACIOUS;
1387 ECounterSetOp TimeMonitor::setOp_ = Intersection;
1388 bool TimeMonitor::alwaysWriteLocal_ = false;
1389 bool TimeMonitor::writeGlobalStats_ = true;
1390 bool TimeMonitor::writeZeroTimers_ = true;
1391
1392 void
1393 TimeMonitor::setReportFormatParameter (ParameterList& plist)
1394 {
1395 const std::string name ("Report format");
1396 const std::string defaultValue ("Table");
1397 const std::string docString ("Output format for report of timer statistics");
1398 Array<std::string> strings;
1399 Array<std::string> docs;
1400 Array<ETimeMonitorReportFormat> values;
1401
1402 strings.push_back ("YAML");
1403 docs.push_back ("YAML (see yaml.org) format");
1404 values.push_back (REPORT_FORMAT_YAML);
1405 strings.push_back ("Table");
1406 docs.push_back ("Tabular format via Teuchos::TableFormat");
1407 values.push_back (REPORT_FORMAT_TABLE);
1408
1409 setStringToIntegralParameter<ETimeMonitorReportFormat> (name, defaultValue,
1410 docString,
1411 strings (), docs (),
1412 values (), &plist);
1413 }
1414
1415 void
1416 TimeMonitor::setYamlFormatParameter (ParameterList& plist)
1417 {
1418 const std::string name ("YAML style");
1419 const std::string defaultValue ("spacious");
1420 const std::string docString ("YAML-specific output format");
1421 Array<std::string> strings;
1422 Array<std::string> docs;
1423 Array<ETimeMonitorYamlFormat> values;
1424
1425 strings.push_back ("compact");
1426 docs.push_back ("Compact format: use \"flow style\" (see YAML 1.2 spec at "
1427 "yaml.org) for most sequences except the outermost sequence");
1428 values.push_back (YAML_FORMAT_COMPACT);
1429
1430 strings.push_back ("spacious");
1431 docs.push_back ("Spacious format: avoid flow style");
1432 values.push_back (YAML_FORMAT_SPACIOUS);
1433
1434 setStringToIntegralParameter<ETimeMonitorYamlFormat> (name, defaultValue,
1435 docString,
1436 strings (), docs (),
1437 values (), &plist);
1438 }
1439
1440 void
1441 TimeMonitor::setSetOpParameter (ParameterList& plist)
1442 {
1443 const std::string name ("How to merge timer sets");
1444 const std::string defaultValue ("Intersection");
1445 const std::string docString ("How to merge differing sets of timers "
1446 "across processes");
1447 Array<std::string> strings;
1448 Array<std::string> docs;
1449 Array<ECounterSetOp> values;
1450
1451 strings.push_back ("Intersection");
1452 docs.push_back ("Compute intersection of timer sets over processes");
1453 values.push_back (Intersection);
1454 strings.push_back ("Union");
1455 docs.push_back ("Compute union of timer sets over processes");
1456 values.push_back (Union);
1457
1458 setStringToIntegralParameter<ECounterSetOp> (name, defaultValue, docString,
1459 strings (), docs (), values (),
1460 &plist);
1461 }
1462
1463 void
1468
1471 {
1472 return stackedTimer_;
1473 }
1474
1475 bool
1476 TimeMonitor::stackedTimerNameIsDefault() {
1477 return stackedTimer_.is_null() || (stackedTimer_->name() == defaultStackedTimerName);
1478 }
1479
1480 RCP<const ParameterList>
1482 {
1483 // Our implementation favors recomputation over persistent
1484 // storage. That is, we simply recreate the list every time we
1485 // need it.
1486 RCP<ParameterList> plist = parameterList ("TimeMonitor::report");
1487
1488 const bool alwaysWriteLocal = false;
1489 const bool writeGlobalStats = true;
1490 const bool writeZeroTimers = true;
1491
1492 setReportFormatParameter (*plist);
1493 setYamlFormatParameter (*plist);
1494 setSetOpParameter (*plist);
1495 plist->set ("alwaysWriteLocal", alwaysWriteLocal,
1496 "Always output local timers' values on Proc 0");
1497 plist->set ("writeGlobalStats", writeGlobalStats, "Always output global "
1498 "statistics, even if there is only one process in the "
1499 "communicator");
1500 plist->set ("writeZeroTimers", writeZeroTimers, "Generate output for "
1501 "timers that have never been called");
1502
1504 }
1505
1506 void
1507 TimeMonitor::setReportParameters (const RCP<ParameterList>& params)
1508 {
1509 ETimeMonitorReportFormat reportFormat = REPORT_FORMAT_TABLE;
1510 ETimeMonitorYamlFormat yamlStyle = YAML_FORMAT_SPACIOUS;
1511 ECounterSetOp setOp = Intersection;
1512 bool alwaysWriteLocal = false;
1513 bool writeGlobalStats = true;
1514 bool writeZeroTimers = true;
1515
1516 if (params.is_null ()) {
1517 // If we've set parameters before, leave their current values.
1518 // Otherwise, set defaults (below).
1519 if (setParams_) {
1520 return;
1521 }
1522 }
1523 else { // params is nonnull. Let's read it!
1524 params->validateParametersAndSetDefaults (*getValidReportParameters ());
1525
1526 reportFormat = getIntegralValue<ETimeMonitorReportFormat> (*params, "Report format");
1527 yamlStyle = getIntegralValue<ETimeMonitorYamlFormat> (*params, "YAML style");
1528 setOp = getIntegralValue<ECounterSetOp> (*params, "How to merge timer sets");
1529 alwaysWriteLocal = params->get<bool> ("alwaysWriteLocal");
1530 writeGlobalStats = params->get<bool> ("writeGlobalStats");
1531 writeZeroTimers = params->get<bool> ("writeZeroTimers");
1532 }
1533 // Defer setting state until here, to ensure the strong exception
1534 // guarantee for this method (either it throws with no externally
1535 // visible state changes, or it returns normally).
1536 reportFormat_ = reportFormat;
1537 yamlStyle_ = yamlStyle;
1538 setOp_ = setOp;
1539 alwaysWriteLocal_ = alwaysWriteLocal;
1540 writeGlobalStats_ = writeGlobalStats;
1541 writeZeroTimers_ = writeZeroTimers;
1542
1543 setParams_ = true; // Yay, we successfully set parameters!
1544 }
1545
1546 void
1548 std::ostream& out,
1549 const std::string& filter,
1551 {
1552 setReportParameters (params);
1553
1554 if (reportFormat_ == REPORT_FORMAT_YAML) {
1555 summarizeToYaml (comm, out, yamlStyle_, filter);
1556 }
1557 else if (reportFormat_ == REPORT_FORMAT_TABLE) {
1558 summarize (comm, out, alwaysWriteLocal_, writeGlobalStats_,
1559 writeZeroTimers_, setOp_, filter);
1560 }
1561 else {
1562 TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "TimeMonitor::report: "
1563 "Invalid report format. This should never happen; ParameterList "
1564 "validation should have caught this. Please report this bug to the "
1565 "Teuchos developers.");
1566 }
1567 }
1568
1569 void
1571 std::ostream& out,
1573 {
1574 report (comm, out, "", params);
1575 }
1576
1577 void
1578 TimeMonitor::report (std::ostream& out,
1579 const std::string& filter,
1581 {
1583 report (comm.ptr (), out, filter, params);
1584 }
1585
1586 void
1587 TimeMonitor::report (std::ostream& out,
1589 {
1591 report (comm.ptr (), out, "", params);
1592 }
1593
1594} // namespace Teuchos
Defines basic traits for the scalar field type.
A column of TableEntry objects.
Provides utilities for formatting tabular output.
Scope guard for Teuchos::Time, with MPI collective timer reporting.
std::vector< T >::const_iterator const_iterator
The type of a const forward iterator.
static Teuchos::RCP< const Comm< OrdinalType > > getComm()
Return the default global communicator.
Teuchos version of MPI_MAXLOC.
void reduce(const Ordinal count, const std::pair< ScalarType, IndexType > inBuffer[], std::pair< ScalarType, IndexType > inoutBuffer[]) const
same as MinLoc, but don't allow zero
void reduce(const Ordinal count, const std::pair< ScalarType, IndexType > inBuffer[], std::pair< ScalarType, IndexType > inoutBuffer[]) const
Teuchos version of MPI_MINLOC.
void reduce(const Ordinal count, const std::pair< ScalarType, IndexType > inBuffer[], std::pair< ScalarType, IndexType > inoutBuffer[]) const
Common capabilities for collecting and reporting performance data across processors.
static TableFormat & format()
Table format that will be used to print a summary of timer results.
static std::map< std::string, RCP< Time > > & counters()
Array of all counters that were created with getNewCounter() on the calling (MPI) process.
static RCP< Time > lookupCounter(const std::string &name)
Return the first counter with the given name, or null if none.
const Time & counter() const
Constant access to the instance's counter reference.
bool isRecursiveCall() const
Whether we are currently in a recursive call of the counter.
Simple wrapper class for raw pointers to single objects where no persisting relationship exists.
Smart reference counting pointer class for automatic garbage collection.
void reset()
Reset to null.
bool is_null() const
Returns true if the underlying pointer is null.
Ptr< T > ptr() const
Get a safer wrapper raw C++ pointer to the underlying object.
T * get() const
Get the raw C++ pointer to the underlying object.
This class allows one to push and pop timers on and off a stack.
~SyncTimeMonitor() override
Destructor: stops the timer.
SyncTimeMonitor()=delete
Default constructor is deleted, since it would be unsafe.
Scope guard for Time, that can compute MPI collective timer statistics.
~TimeMonitor() override
Destructor: stops the timer.
static void computeGlobalTimerStatistics(stat_map_type &statData, std::vector< std::string > &statNames, Ptr< const Comm< int > > comm, const ECounterSetOp setOp=Intersection, const std::string &filter="")
Compute global timer statistics for all timers on the given communicator.
static Teuchos::RCP< Teuchos::StackedTimer > getStackedTimer()
The StackedTimer used by the TimeMonitor.
static void enableTimer(const std::string &name)
Enable the timer with the given name.
static void zeroOutTimers()
Reset all global timers to zero.
static void setStackedTimer(const Teuchos::RCP< Teuchos::StackedTimer > &t)
Sets the StackedTimer into which the TimeMonitor will insert timings.
static void report(Ptr< const Comm< int > > comm, std::ostream &out, const std::string &filter, const RCP< ParameterList > &params=null)
Report timer statistics to the given output stream.
static void summarize(Ptr< const Comm< int > > comm, std::ostream &out=std::cout, const bool alwaysWriteLocal=false, const bool writeGlobalStats=true, const bool writeZeroTimers=true, const ECounterSetOp setOp=Intersection, const std::string &filter="", const bool ignoreZeroTimers=false)
Print summary statistics for all timers on the given communicator.
static Teuchos::RCP< Teuchos::StackedTimer > stackedTimer_
Stacked timer for optional injection of timing from TimeMonitor-enabled objects.
static RCP< const ParameterList > getValidReportParameters()
Default parameters (with validators) for report().
static void disableTimer(const std::string &name)
Disable the timer with the given name.
TimeMonitor()=delete
Default constructor is deleted, since it would be unsafe.
Wall-clock timer.
Base interface class for user-defined reduction operations for objects that use value semantics.
std::ostream subclass that performs the magic of indenting data sent to an std::ostream object among ...
Tabbing class for helping to create formated, indented output for a basic_FancyOStream object.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Macro for throwing an exception with breakpointing to ease debugging.
bool nonnull(const std::shared_ptr< T > &p)
Returns true if p.get()!=NULL.
basic_FancyOStream< char > FancyOStream
basic_OSTab< char > OSTab
The Teuchos namespace contains all of the classes, structs and enums used by Teuchos,...
ECounterSetOp
Set operation type for mergeCounterNames() to perform.
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Deprecated.
void mergeCounterNames(const Comm< int > &comm, const Array< std::string > &localNames, Array< std::string > &globalNames, const ECounterSetOp setOp)
Merge counter names over all processors.
std::map< std::string, std::vector< std::pair< double, double > > > stat_map_type
Global statistics collected from timer data.
This structure defines some basic traits for a scalar field type.
static T zero()
Returns representation of zero for this scalar type.