Teuchos - Trilinos Tools Package Version of the Day
Loading...
Searching...
No Matches
Teuchos_StackedTimer.hpp
1// @HEADER
2// *****************************************************************************
3// Teuchos: Common Tools Package
4//
5// Copyright 2004 NTESS and the Teuchos contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TEUCHOS_STACKED_TIMER_HPP
11#define TEUCHOS_STACKED_TIMER_HPP
12
14#include "Teuchos_Comm.hpp"
15#include "Teuchos_DefaultComm.hpp"
16#include "Teuchos_CommHelpers.hpp"
17#include "Teuchos_RCP.hpp"
18#include "Teuchos_Array.hpp"
20#include "Teuchos_Behavior.hpp"
21#ifdef HAVE_TEUCHOSCORE_KOKKOS
22#include "Kokkos_Core.hpp"
23#endif
24#include <string>
25#include <vector>
26#include <cassert>
27#include <chrono>
28#include <climits>
29#include <cstdlib> // for std::getenv and atoi
30#include <ctime> // for timestamp support
31#include <iostream>
32
33#if defined(HAVE_TEUCHOS_KOKKOS_PROFILING) && defined(HAVE_TEUCHOSCORE_KOKKOS)
34namespace Kokkos {
35namespace Tools {
36extern void pushRegion (const std::string&);
37extern void popRegion ();
38} // namespace Profiling
39} // namespace Kokkos
40#endif
41
42
43namespace Teuchos {
44
46TEUCHOSCOMM_LIB_DLL_EXPORT void error_out(const std::string& msg, const bool fail_all = false);
47
57class BaseTimer {
58
59public:
60
61 using Clock = std::chrono::high_resolution_clock;
62
63 BaseTimer() : accumulation_(0.0), accumulationSquared_(0.0), count_started_(0), count_updates_(0), running_(false) {}
64
66 void start(){
67 if (running_)
68 error_out("Base_Timer:start Failed timer already running");
69 start_time_ = Clock::now();
70
71 count_started_++;
72 running_ = true;
73 }
74
76 void stop(){
77 if (!running_)
78 error_out("Base_Timer:stop Failed timer not running");
79 auto elapsed = std::chrono::duration_cast<std::chrono::duration<double>>(Clock::now() - start_time_).count();
80 accumulation_ += elapsed;
81 accumulationSquared_ += elapsed*elapsed;
82 running_ = false;
83 }
84
86 unsigned long long incrementUpdates(unsigned long long count=1) {count_updates_ += count; return count_updates_;}
87
89 double accumulatedTime() const {return accumulation_;}
90
92 void setAccumulatedTime(double accum = 0) { accumulation_ = accum; }
93
95 void setAccumulatedTimeSquared(double accumSq=0) {accumulationSquared_=accumSq;}
96
double accumulatedTimePerUpdate() const {
106 if (count_updates_ > 0) {
107 return accumulation_/count_updates_;
108 } else {
109 return 0;
110 }
111 }
112
113
122 if (count_started_> 0) {
123 return accumulation_/count_started_;
124 } else {
125 return 0;
126 }
127 }
128
136 double timePerCallStdDev() const {
137 if (count_started_ > 0) {
139 return sqrt(std::max<double>(accumulationSquared_ / count_started_ - mean*mean, 0.0));
140 } else {
141 return 0;
142 }
143 }
144
151 double difference(const BaseTimer &from) const {
152 return accumulation_ - from.accumulation_;
153 }
154
156 void reset() {
157 if (running_)
158 error_out("BaseTimer, cannot reset a running timer");
159 accumulation_ = 0.0;
160 count_started_ = count_updates_ = 0;
161 }
162
164 bool running() const { return running_;}
165
167 unsigned long numCalls() const { return count_started_; }
168
170 unsigned long long numUpdates() const { return count_updates_; }
171
174 { count_started_ = num_calls; }
175
177 void overrideNumUpdatesForUnitTesting(const unsigned long long num_updates)
178 { count_updates_ = num_updates; }
179
180 struct TimeInfo {
181 TimeInfo():time(0.0), stdDev(0.0), count(0), updates(0), running(false){}
182 TimeInfo(BaseTimer* t): time(t->accumulation_), stdDev(t->timePerCallStdDev()), count(t->count_started_), updates(t->count_updates_), running(t->running()) {}
183 double time;
184 double stdDev;
185 unsigned long count;
186 unsigned long long updates;
187 bool running;
188 };
189
190protected:
191 double accumulation_; // total time
192 double accumulationSquared_; // Sum of squares of elapsed times
193 unsigned long count_started_; // Number of times this timer has been started
194 unsigned long long count_updates_; // Total count of items updated during this timer
195 Clock::time_point start_time_;
196 bool running_;
197
198 friend struct TimeInfo;
199};
200
218class TEUCHOSCOMM_LIB_DLL_EXPORT StackedTimer
219{
220protected:
221
230 class LevelTimer : public BaseTimer {
231 protected:
232
233 // TODO: implement operator=
234
235 unsigned level_;
236 std::string name_;
237 LevelTimer *parent_;
238 std::vector<LevelTimer> sub_timers_;
239 public:
241 LevelTimer();
242
250 LevelTimer(int level,
251 const char* name = "RootTimer",
252 LevelTimer *parent=nullptr,
253 bool start_timer=true) :
254 BaseTimer(),
255 level_(level),
256 name_(name),
257 parent_(parent)
258 {
259 if ( start_timer )
260 BaseTimer::start();
261
262 }
263
266 BaseTimer(src), level_(src.level_), name_(src.name_),parent_(src.parent_), sub_timers_(src.sub_timers_)
267 {
268 for (unsigned i=0;i<sub_timers_.size();++i)
269 sub_timers_[i].parent_ = this;
270 }
271
277 LevelTimer* start(const char* sub_name) {
278 for (unsigned i=0;i<sub_timers_.size();i++ )
279 if (sub_name == sub_timers_[i].name_ ) {
280 sub_timers_[i].BaseTimer::start();
281 return &sub_timers_[i];
282 }
283 sub_timers_.push_back(LevelTimer(level_+1,sub_name,this,true));
284 return &sub_timers_[sub_timers_.size()-1];
285 }
286
294 LevelTimer* stop(const std::string &name = "RootTimer") {
295 if (name != name_)
296 error_out("Stopping timer "+name+" But top level running timer is "+name_);
297 BaseTimer::stop();
298 return parent_;
299 }
300
301
306 std::string get_full_name() const {
307 std::string parent_name("");
308 if ((parent_ != nullptr))
309 parent_name = parent_->get_full_name() + "@";
310
311 std::string my_name(name_);
312
313 std::string full_name = parent_name + my_name;
314 return full_name;
315 }
316
317 std::string get_name() const {
318 std::string my_name(name_);
319 return my_name;
320 }
321
328 int count=1;
329 for (unsigned i=0;i<sub_timers_.size(); ++i)
330 count += sub_timers_[i].countTimers();
331 return count;
332 }
333
334 void addTimerNames(Array<std::string> &names, unsigned &pos) {
335 names[pos++] = get_full_name();
336 for (unsigned i=0;i<sub_timers_.size(); ++i)
337 sub_timers_[i].addTimerNames(names, pos);
338 }
339
345 double accumulatedTime(const std::string &locate_name="") {
346
347 if (locate_name == "")
348 return BaseTimer::accumulatedTime();
349
350 std::string first_name,second_name;
351
352 size_t i = locate_name.find_first_of('@');
353 if ( i >= locate_name.size() ) {
355 second_name = "";
356 } else {
357 first_name.assign(locate_name,0,i);
358 second_name.assign(locate_name,i+1,locate_name.size()-i-1);
359 }
360 for (unsigned j=0;j<sub_timers_.size();++j)
361 if ( first_name == sub_timers_[j].name_)
362 return sub_timers_[j].accumulatedTime(second_name);
363 return 0;
364 }
365
367 unsigned level() const
368 {return level_;}
369
370 protected:
377 void splitString(const std::string &locate_name, std::string &first_name, std::string &second_name) {
378 size_t i = locate_name.find_first_of('@');
379 if ( i >= locate_name.size() ) {
381 second_name = "";
382 } else {
383 first_name.assign(locate_name,0,i);
384 second_name.assign(locate_name,i+1,locate_name.size()-i-1);
385 }
386 }
387
388 public:
394 double accumulatedTimePerUpdate(const std::string &locate_name="") {
395
396 if (locate_name == "")
397 return BaseTimer::accumulatedTimePerUpdate();
398
399 std::string first_name,second_name;
400 splitString(locate_name, first_name, second_name);
401
402 for (unsigned j=0;j<sub_timers_.size();j++)
403 if ( first_name == sub_timers_[j].name_)
404 return sub_timers_[j].accumulatedTimePerUpdate(second_name);
405 return 0;
406 }
407
413 double accumulatedTimePerTimerCall(const std::string &locate_name="") {
414
415 if (locate_name == "")
416 return BaseTimer::accumulatedTimePerTimerCall();
417
418 std::string first_name,second_name;
419 splitString(locate_name, first_name, second_name);
420
421 for (unsigned j=0;j<sub_timers_.size();j++)
422 if ( first_name == sub_timers_[j].name_)
423 return sub_timers_[j].accumulatedTimePerTimerCall(second_name);
424 return 0;
425 }
426
430 void pack();
431
438
443 void report(std::ostream &os);
444
450 const BaseTimer* findBaseTimer(const std::string &name) const;
451
458 BaseTimer::TimeInfo findTimer(const std::string &name,bool& found);
459
460 }; // LevelTimer
461
462
463public:
469 explicit StackedTimer(const char *name, const bool start_base_timer = true)
470 : timer_(0,name,nullptr,false),
471 global_mpi_aggregation_called_(false),
472 enable_verbose_(false),
473 verbose_timestamp_levels_(0), // 0 disables
474 verbose_ostream_(Teuchos::rcpFromRef(std::cout)),
475 enable_timers_(true)
476 {
477 top_ = &timer_;
479 this->startBaseTimer();
480
481 auto check_verbose = std::getenv("TEUCHOS_ENABLE_VERBOSE_TIMERS");
482 if (check_verbose != nullptr)
483 enable_verbose_ = true;
484
485 auto check_timestamp = std::getenv("TEUCHOS_ENABLE_VERBOSE_TIMESTAMP_LEVELS");
486 if (check_timestamp != nullptr) {
487 verbose_timestamp_levels_ = std::atoi(check_timestamp);
488 }
489 }
490
491 std::string name() {
492 return timer_.get_full_name();
493 }
494
499 timer_.BaseTimer::start();
500#ifdef HAVE_TEUCHOSCORE_KOKKOS
501 if (Behavior::fenceTimers()) {
502 Kokkos::fence("timer_fence_begin_"+timer_.get_name());
503 }
504#endif
505#if defined(HAVE_TEUCHOS_KOKKOS_PROFILING) && defined(HAVE_TEUCHOSCORE_KOKKOS)
507 ::Kokkos::Tools::pushRegion(timer_.get_name());
508 }
509#endif
510 }
511
516 timer_.BaseTimer::stop();
517#ifdef HAVE_TEUCHOSCORE_KOKKOS
518 if (Behavior::fenceTimers()) {
519 Kokkos::fence("timer_fence_end_"+timer_.get_name());
520 }
521#endif
522#if defined(HAVE_TEUCHOS_KOKKOS_PROFILING) && defined(HAVE_TEUCHOSCORE_KOKKOS)
524 ::Kokkos::Tools::popRegion();
525 }
526#endif
527 }
528
534 void start(const std::string name,
535 const bool push_kokkos_profiling_region = true) {
536 if (enable_timers_) {
537 if (top_ == nullptr) {
538 top_ = timer_.start(name.c_str());
539 } else {
540 top_ = top_->start(name.c_str());
541#ifdef HAVE_TEUCHOSCORE_KOKKOS
542 if (Behavior::fenceTimers()) {
543 Kokkos::fence("timer_fence_begin_"+name);
544 }
545#endif
546#if defined(HAVE_TEUCHOS_KOKKOS_PROFILING) && defined(HAVE_TEUCHOSCORE_KOKKOS)
548 ::Kokkos::Tools::pushRegion(name);
549 }
550#endif
551 }
552 }
553 if (enable_verbose_) {
554 if (!verbose_timestamp_levels_) {
555 *verbose_ostream_ << "STARTING: " << name << std::endl;
556 }
557 // gcc 4.X is incomplete in c++11 standard - missing
558 // std::put_time. We'll disable this feature for gcc 4.
559#if !defined(__GNUC__) || ( defined(__GNUC__) && (__GNUC__ > 4) )
560 else if (top_ != nullptr) {
561 if ( top_->level() <= verbose_timestamp_levels_) {
562 auto now = std::chrono::system_clock::now();
563 auto now_time = std::chrono::system_clock::to_time_t(now);
564 auto gmt = gmtime(&now_time);
565 auto timestamp = std::put_time(gmt, "%Y-%m-%d %H:%M:%S");
566 auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(now.time_since_epoch()) % 1000;
567 *verbose_ostream_ << "STARTING: " << name << " LEVEL: " << top_->level() << " COUNT: " << timer_.numCalls() << " TIMESTAMP: " << timestamp << "." << ms.count() << std::endl;
568 }
569 }
570#endif
571 }
572 }
573
579 void stop(const std::string &name,
580 const bool pop_kokkos_profiling_region = true) {
581 if (enable_timers_) {
582 if (top_) {
583 top_ = top_->stop(name);
584#ifdef HAVE_TEUCHOSCORE_KOKKOS
585 if (Behavior::fenceTimers()) {
586 Kokkos::fence("timer_fence_end_"+name);
587 }
588#endif
589#if defined(HAVE_TEUCHOS_KOKKOS_PROFILING) && defined(HAVE_TEUCHOSCORE_KOKKOS)
591 ::Kokkos::Tools::popRegion();
592 }
593#endif
594 } else {
595 timer_.BaseTimer::stop();
596 }
597 }
598 if (enable_verbose_) {
599 if (!verbose_timestamp_levels_) {
600 *verbose_ostream_ << "STOPPING: " << name << std::endl;
601 }
602 // gcc 4.X is incomplete in c++11 standard - missing
603 // std::put_time. We'll disable this feature for gcc 4.
604#if !defined(__GNUC__) || ( defined(__GNUC__) && (__GNUC__ > 4) )
605 // The stop adjusts the top level, need to adjust by +1 for printing
606 else if (top_ != nullptr) {
607 if ( (top_->level()+1) <= verbose_timestamp_levels_) {
608 auto now = std::chrono::system_clock::now();
609 auto now_time = std::chrono::system_clock::to_time_t(now);
610 auto gmt = gmtime(&now_time);
611 auto timestamp = std::put_time(gmt, "%Y-%m-%d %H:%M:%S");
612 auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(now.time_since_epoch()) % 1000;
613 *verbose_ostream_ << "STOPPING: " << name << " LEVEL: " << top_->level()+1 << " COUNT: " << timer_.numCalls() << " TIMESTAMP: " << timestamp << "." << ms.count() << std::endl;
614 }
615 }
616#endif
617 }
618 }
619
624 void incrementUpdates(const long long i = 1) {
625 top_->incrementUpdates(i);
626 }
627
633 double accumulatedTime(const std::string &name="") {
634 if (top_) // Top is null for the head node when nothing is running
635 return top_->accumulatedTime(name);
636 else
637 return timer_.accumulatedTime(name);
638 }
639
645 double accumulatedTimePerUpdate(const std::string &name="") {
646 if (top_) // Top is null for the head node when nothing is running
647 return top_->accumulatedTimePerUpdate(name);
648 else
649 return timer_.accumulatedTimePerUpdate(name);
650 }
656 double accumulatedTimePerTimerCall(const std::string &name="") {
657 if (top_) // Top is null for the head node when nothing is running
658 return top_->accumulatedTimePerTimerCall(name);
659 else
660 return timer_.accumulatedTimePerTimerCall(name);
661 }
662
668 const BaseTimer* findBaseTimer(const std::string &name) const {
669 const BaseTimer* baseTimer = timer_.findBaseTimer(name);
670 TEUCHOS_TEST_FOR_EXCEPTION(baseTimer == nullptr, std::runtime_error,
671 "StackedTimer::findBaseTimer() failed to find a timer named \"" << name << "\"!\n");
672 return baseTimer;
673 }
674
680 BaseTimer::TimeInfo findTimer(const std::string &name) {
681 bool foundTimer = false;
682 const auto timeInfo = timer_.findTimer(name,foundTimer);
683 TEUCHOS_TEST_FOR_EXCEPTION(!foundTimer, std::runtime_error,
684 "StackedTimer::findTimer() failed to find a timer named \"" << name << "\"!\n");
685 return timeInfo;
686 }
687
688 void report(std::ostream &os) {
689 timer_.report(os);
690 }
691
707 OutputOptions() : output_fraction(false), output_total_updates(false), output_histogram(false),
708 output_minmax(false), output_proc_minmax(false), num_histogram(10), max_levels(INT_MAX),
709 print_warnings(true), align_columns(false), print_names_before_values(true),
710 drop_time(-1.0), output_per_proc_stddev(false) {}
711 bool output_fraction;
712 bool output_total_updates;
713 bool output_histogram;
714 bool output_minmax;
715 bool output_proc_minmax;
716 int num_histogram;
717 int max_levels;
718 bool print_warnings;
719 bool align_columns;
720 bool print_names_before_values;
721 double drop_time;
722 bool output_per_proc_stddev;
723 };
724
731 void report(std::ostream &os, Teuchos::RCP<const Teuchos::Comm<int> > comm, OutputOptions options = OutputOptions());
732
742 void reportXML(std::ostream &os, const std::string& datestamp, const std::string& timestamp, Teuchos::RCP<const Teuchos::Comm<int> > comm);
743
775 std::string reportWatchrXML(const std::string& name, Teuchos::RCP<const Teuchos::Comm<int> > comm);
776
778 void enableVerbose(const bool enable_verbose);
779
781 void enableVerboseTimestamps(const unsigned levels);
782
784 void setVerboseOstream(const Teuchos::RCP<std::ostream>& os);
785
788 void disableTimers();
789
792 void enableTimers();
793
799 void aggregateMpiData(Teuchos::RCP<const Teuchos::Comm<int> > comm, OutputOptions options = OutputOptions());
800
809 double getMpiAverageTime(const std::string& flat_timer_name);
810
819 double getMpiAverageCount(const std::string& flat_timer_name);
820
829 bool isTimer(const std::string& flat_timer_name);
830
831protected:
836
837 // Global MPI aggregation arrays
838 Array<std::string> flat_names_;
839 Array<double> min_;
840 Array<double> max_;
841 Array<int> procmin_;
842 Array<int> procmax_;
843 Array<double> sum_;
844 Array<double> sum_sq_;
845 Array<Array<int>> hist_;
846 Array<double> per_proc_stddev_min_;
847 Array<double> per_proc_stddev_max_;
850 Array<int> active_;
851 bool global_mpi_aggregation_called_;
852
855 std::string::size_type timer_names_;
856 std::string::size_type average_time_;
857 std::string::size_type fraction_;
858 std::string::size_type count_;
859 std::string::size_type total_updates_;
860 std::string::size_type min_;
861 std::string::size_type max_;
862 std::string::size_type procmin_;
863 std::string::size_type procmax_;
864 std::string::size_type stddev_;
865 std::string::size_type histogram_;
867 timer_names_(0),
868 average_time_(0),
869 fraction_(0),
870 count_(0),
871 total_updates_(0),
872 min_(0),
873 max_(0),
874 procmax_(0),
875 stddev_(0),
876 histogram_(0){}
877 } alignments_;
878
881
884
887
890
894 void flatten();
895
900 void merge(Teuchos::RCP<const Teuchos::Comm<int> > comm);
901
905 void collectRemoteData(Teuchos::RCP<const Teuchos::Comm<int> > comm, const OutputOptions &options );
906
910 int getFlatNameIndex(const std::string& flat_timer_name);
911
918 double computeColumnWidthsForAligment(std::string prefix, int print_level,
919 std::vector<bool> &printed, double parent_time,
920 const OutputOptions &options);
921
925 double printLevel(std::string prefix, int level, std::ostream &os, std::vector<bool> &printed,
926 double parent_time, const OutputOptions &options);
927
932 double printLevelXML(std::string prefix, int level, std::ostream &os, std::vector<bool> &printed, double parent_time, const std::string& rootName = "");
933
934}; //StackedTimer
935
936
937} //namespace Teuchos
938
939#endif /* TEUCHOS_STACKED_TIMER_HPP */
Templated array class derived from the STL std::vector.
Teuchos header file which uses auto-configuration information to include necessary C++ headers.
Common capabilities for collecting and reporting performance data collectively across MPI processes.
Reference-counted pointer class and non-member templated function implementations.
The basic timer used internally, uses std::chrono::high_resolution_clock.
void setAccumulatedTime(double accum=0)
Setter for accumulated time.
double difference(const BaseTimer &from) const
Return the difference between two timers in seconds,.
void reset()
Reset all the timer stats, throws if it is already running.
void start()
Start a currently stopped timer.
double accumulatedTimePerTimerCall() const
return the average time per timer start/stop
double accumulatedTimePerUpdate() const
return the average time per item updated
void stop()
Stop a current running timer and accumulate time difference.
void overrideNumUpdatesForUnitTesting(const unsigned long long num_updates)
Sets the number of counts for this timer. This is only used for unit testing.
double accumulatedTime() const
Get the total accumulated time since last reset or construction when the timer is running.
double timePerCallStdDev() const
return the std dev in time per timer start/stop
void overrideNumCallsForUnitTesting(const unsigned long num_calls)
Sets the number of calls to start() for this timer. This is only used for unit testing.
unsigned long long numUpdates() const
Returns the number of updates added to this timer.
void setAccumulatedTimeSquared(double accumSq=0)
Setter for squared accumulated time.
unsigned long numCalls() const
Returns the number of calls to start().
unsigned long long incrementUpdates(unsigned long long count=1)
Increment the total number of items updated between a start stop.
bool running() const
Returns true if the timer is currently accumulating time.
Smart reference counting pointer class for automatic garbage collection.
Timer info at a given level and all the children.
double accumulatedTime(const std::string &locate_name="")
LevelTimer * start(const char *sub_name)
void splitString(const std::string &locate_name, std::string &first_name, std::string &second_name)
split a string into two parts split by a '@' if no '@' first gets the full string
double accumulatedTimePerTimerCall(const std::string &locate_name="")
LevelTimer(const LevelTimer &src)
Copy constructor.
LevelTimer * stop(const std::string &name="RootTimer")
unsigned level() const
Returns the level of the timer in the stack.
double accumulatedTimePerUpdate(const std::string &locate_name="")
LevelTimer * unpack(unsigned from)
LevelTimer(int level, const char *name="RootTimer", LevelTimer *parent=nullptr, bool start_timer=true)
This class allows one to push and pop timers on and off a stack.
LevelTimer * top_
Current level running.
void start(const std::string name, const bool push_kokkos_profiling_region=true)
BaseTimer::TimeInfo findTimer(const std::string &name)
void stop(const std::string &name, const bool pop_kokkos_profiling_region=true)
void startBaseTimer(const bool push_kokkos_profiling_region=true)
void stopBaseTimer(const bool pop_kokkos_profiling_region=true)
const BaseTimer * findBaseTimer(const std::string &name) const
Teuchos::RCP< std::ostream > verbose_ostream_
For debugging, this is the ostream used for printing.
double accumulatedTimePerUpdate(const std::string &name="")
void incrementUpdates(const long long i=1)
bool enable_timers_
Used to disable timers for asynchronous work.
unsigned verbose_timestamp_levels_
If set to a value greater than 0, verbose mode will print that many levels of timers with timestamps....
double accumulatedTime(const std::string &name="")
LevelTimer timer_
Base timer.
bool enable_verbose_
If set to true, prints to the debug ostream. At construction, default value is set from environment v...
double accumulatedTimePerTimerCall(const std::string &name="")
StackedTimer(const char *name, const bool start_base_timer=true)
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Macro for throwing an exception with breakpointing to ease debugging.
The Teuchos namespace contains all of the classes, structs and enums used by Teuchos,...
void error_out(const std::string &msg, const bool)
Error reporting function for stacked timer.
Stores the column widths for output alignment.