Teuchos - Trilinos Tools Package Version of the Day
Loading...
Searching...
No Matches
Teuchos_StackedTimer.hpp
1// @HEADER
2// *****************************************************************************
3// Teuchos: Common Tools Package
4//
5// Copyright 2004 NTESS and the Teuchos contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TEUCHOS_STACKED_TIMER_HPP
11#define TEUCHOS_STACKED_TIMER_HPP
12
14#include "Teuchos_Comm.hpp"
15#include "Teuchos_DefaultComm.hpp"
16#include "Teuchos_CommHelpers.hpp"
17#include "Teuchos_RCP.hpp"
18#include "Teuchos_Array.hpp"
20#include "Teuchos_Behavior.hpp"
21#ifdef HAVE_TEUCHOSCORE_KOKKOS
22#include "Kokkos_Core.hpp"
23#endif
24#include <string>
25#include <vector>
26#include <cassert>
27#include <chrono>
28#include <climits>
29#include <cstdlib> // for std::getenv and atoi
30#include <ctime> // for timestamp support
31#include <iostream>
32
33#if defined(HAVE_TEUCHOS_KOKKOS_PROFILING) && defined(HAVE_TEUCHOSCORE_KOKKOS)
34namespace Kokkos {
35namespace Tools {
36extern void pushRegion (const std::string&);
37extern void popRegion ();
38} // namespace Profiling
39} // namespace Kokkos
40#endif
41
42
43namespace Teuchos {
44
46TEUCHOSCOMM_LIB_DLL_EXPORT void error_out(const std::string& msg, const bool fail_all = false);
47
57class BaseTimer {
58
59public:
60
61 using Clock = std::chrono::high_resolution_clock;
62
63 BaseTimer() : accumulation_(0.0), accumulationSquared_(0.0), count_started_(0), count_updates_(0), running_(false) {}
64
66 void start(){
67 if (running_)
68 error_out("Base_Timer:start Failed timer already running");
69 start_time_ = Clock::now();
70
71 count_started_++;
72 running_ = true;
73 }
74
76 void stop(){
77 if (!running_)
78 error_out("Base_Timer:stop Failed timer not running");
79 auto elapsed = std::chrono::duration_cast<std::chrono::duration<double>>(Clock::now() - start_time_).count();
80 accumulation_ += elapsed;
81 accumulationSquared_ += elapsed*elapsed;
82 running_ = false;
83 }
84
86 unsigned long long incrementUpdates(unsigned long long count=1) {count_updates_ += count; return count_updates_;}
87
89 double accumulatedTime() const {return accumulation_;}
90
92 void setAccumulatedTime(double accum = 0) { accumulation_ = accum; }
93
95 void setAccumulatedTimeSquared(double accumSq=0) {accumulationSquared_=accumSq;}
96
double accumulatedTimePerUpdate() const {
106 if (count_updates_ > 0) {
107 return accumulation_/count_updates_;
108 } else {
109 return 0;
110 }
111 }
112
113
122 if (count_started_> 0) {
123 return accumulation_/count_started_;
124 } else {
125 return 0;
126 }
127 }
128
136 double timePerCallStdDev() const {
137 if (count_started_ > 0) {
139 return sqrt(std::max<double>(accumulationSquared_ / count_started_ - mean*mean, 0.0));
140 } else {
141 return 0;
142 }
143 }
144
151 double difference(const BaseTimer &from) const {
152 return accumulation_ - from.accumulation_;
153 }
154
156 void reset() {
157 if (running_)
158 error_out("BaseTimer, cannot reset a running timer");
159 accumulation_ = 0.0;
160 count_started_ = count_updates_ = 0;
161 }
162
164 bool running() const { return running_;}
165
167 unsigned long numCalls() const { return count_started_; }
168
170 unsigned long long numUpdates() const { return count_updates_; }
171
174 { count_started_ = num_calls; }
175
177 void overrideNumUpdatesForUnitTesting(const unsigned long long num_updates)
178 { count_updates_ = num_updates; }
179
180 struct TimeInfo {
181 TimeInfo():time(0.0), stdDev(0.0), count(0), updates(0), running(false){}
182 TimeInfo(BaseTimer* t): time(t->accumulation_), stdDev(t->timePerCallStdDev()), count(t->count_started_), updates(t->count_updates_), running(t->running()) {}
183 double time;
184 double stdDev;
185 unsigned long count;
186 unsigned long long updates;
187 bool running;
188 };
189
190protected:
191 double accumulation_; // total time
192 double accumulationSquared_; // Sum of squares of elapsed times
193 unsigned long count_started_; // Number of times this timer has been started
194 unsigned long long count_updates_; // Total count of items updated during this timer
195 Clock::time_point start_time_;
196 bool running_;
197
198 friend struct TimeInfo;
199};
200
218class TEUCHOSCOMM_LIB_DLL_EXPORT StackedTimer
219{
220protected:
221
230 class LevelTimer : public BaseTimer {
231 protected:
232
233 // TODO: implement operator=
234
235 unsigned level_;
236 std::string name_;
237 LevelTimer *parent_;
238 std::vector<LevelTimer> sub_timers_;
239 public:
241 LevelTimer();
242
250 LevelTimer(int level,
251 const char* name = "RootTimer",
252 LevelTimer *parent=nullptr,
253 bool start_timer=true) :
254 BaseTimer(),
255 level_(level),
256 name_(name),
257 parent_(parent)
258 {
259 if ( start_timer )
260 BaseTimer::start();
261
262 }
263
266 BaseTimer(src), level_(src.level_), name_(src.name_),parent_(src.parent_), sub_timers_(src.sub_timers_)
267 {
268 for (unsigned i=0;i<sub_timers_.size();++i)
269 sub_timers_[i].parent_ = this;
270 }
271
277 LevelTimer* start(const char* sub_name) {
278 for (unsigned i=0;i<sub_timers_.size();i++ )
279 if (sub_name == sub_timers_[i].name_ ) {
280 sub_timers_[i].BaseTimer::start();
281 return &sub_timers_[i];
282 }
283 sub_timers_.push_back(LevelTimer(level_+1,sub_name,this,true));
284 return &sub_timers_[sub_timers_.size()-1];
285 }
286
294 LevelTimer* stop(const std::string &name = "RootTimer") {
295 if (name != name_)
296 error_out("Stopping timer "+name+" But top level running timer is "+name_);
297 BaseTimer::stop();
298 return parent_;
299 }
300
301
306 std::string get_full_name() const {
307 std::string parent_name("");
308 if ((parent_ != nullptr))
309 parent_name = parent_->get_full_name() + "@";
310
311 std::string my_name(name_);
312
313 std::string full_name = parent_name + my_name;
314 return full_name;
315 }
316
317 std::string get_name() const {
318 std::string my_name(name_);
319 return my_name;
320 }
321
328 int count=1;
329 for (unsigned i=0;i<sub_timers_.size(); ++i)
330 count += sub_timers_[i].countTimers();
331 return count;
332 }
333
334 void addTimerNames(Array<std::string> &names, unsigned &pos) {
335 names[pos++] = get_full_name();
336 for (unsigned i=0;i<sub_timers_.size(); ++i)
337 sub_timers_[i].addTimerNames(names, pos);
338 }
339
345 double accumulatedTime(const std::string &locate_name="") {
346
347 if (locate_name == "")
348 return BaseTimer::accumulatedTime();
349
350 std::string first_name,second_name;
351
352 size_t i = locate_name.find_first_of('@');
353 if ( i >= locate_name.size() ) {
355 second_name = "";
356 } else {
357 first_name.assign(locate_name,0,i);
358 second_name.assign(locate_name,i+1,locate_name.size()-i-1);
359 }
360 for (unsigned j=0;j<sub_timers_.size();++j)
361 if ( first_name == sub_timers_[j].name_)
362 return sub_timers_[j].accumulatedTime(second_name);
363 return 0;
364 }
365
367 unsigned level() const
368 {return level_;}
369
370 protected:
377 void splitString(const std::string &locate_name, std::string &first_name, std::string &second_name) {
378 size_t i = locate_name.find_first_of('@');
379 if ( i >= locate_name.size() ) {
381 second_name = "";
382 } else {
383 first_name.assign(locate_name,0,i);
384 second_name.assign(locate_name,i+1,locate_name.size()-i-1);
385 }
386 }
387
388 public:
394 double accumulatedTimePerUpdate(const std::string &locate_name="") {
395
396 if (locate_name == "")
397 return BaseTimer::accumulatedTimePerUpdate();
398
399 std::string first_name,second_name;
400 splitString(locate_name, first_name, second_name);
401
402 for (unsigned j=0;j<sub_timers_.size();j++)
403 if ( first_name == sub_timers_[j].name_)
404 return sub_timers_[j].accumulatedTimePerUpdate(second_name);
405 return 0;
406 }
407
413 double accumulatedTimePerTimerCall(const std::string &locate_name="") {
414
415 if (locate_name == "")
416 return BaseTimer::accumulatedTimePerTimerCall();
417
418 std::string first_name,second_name;
419 splitString(locate_name, first_name, second_name);
420
421 for (unsigned j=0;j<sub_timers_.size();j++)
422 if ( first_name == sub_timers_[j].name_)
423 return sub_timers_[j].accumulatedTimePerTimerCall(second_name);
424 return 0;
425 }
426
430 void pack();
431
438
443 void report(std::ostream &os);
444
450 const BaseTimer* findBaseTimer(const std::string &name) const;
451
458 BaseTimer::TimeInfo findTimer(const std::string &name,bool& found);
459
460 }; // LevelTimer
461
462
463public:
469 explicit StackedTimer(const char *name, const bool start_base_timer = true)
470 : timer_(0,name,nullptr,false),
471 global_mpi_aggregation_called_(false),
472 enable_verbose_(false),
473 verbose_timestamp_levels_(0), // 0 disables
474 verbose_ostream_(Teuchos::rcpFromRef(std::cout)),
475 enable_timers_(true)
476 {
477 top_ = &timer_;
479 this->startBaseTimer();
480
481 auto check_verbose = std::getenv("TEUCHOS_ENABLE_VERBOSE_TIMERS");
482 if (check_verbose != nullptr)
483 enable_verbose_ = true;
484
485 auto check_timestamp = std::getenv("TEUCHOS_ENABLE_VERBOSE_TIMESTAMP_LEVELS");
486 if (check_timestamp != nullptr) {
487 verbose_timestamp_levels_ = std::atoi(check_timestamp);
488 }
489 }
490
491 std::string name() {
492 return timer_.get_full_name();
493 }
494
499#ifdef HAVE_TEUCHOSCORE_KOKKOS
500 // Fence before starting timer to ignore async kernels started before this timer starts
501 if (Behavior::fenceTimers()) {
502 Kokkos::fence("timer_fence_begin_"+timer_.get_name());
503 }
504#endif
505 timer_.BaseTimer::start();
506#if defined(HAVE_TEUCHOS_KOKKOS_PROFILING) && defined(HAVE_TEUCHOSCORE_KOKKOS)
508 ::Kokkos::Tools::pushRegion(timer_.get_name());
509 }
510#endif
511 }
512
517#ifdef HAVE_TEUCHOSCORE_KOKKOS
518 // Fence before stopping the timer to include async kokkos kernels launched within this timer
519 if (Behavior::fenceTimers()) {
520 Kokkos::fence("timer_fence_end_"+timer_.get_name());
521 }
522#endif
523#if defined(HAVE_TEUCHOS_KOKKOS_PROFILING) && defined(HAVE_TEUCHOSCORE_KOKKOS)
525 ::Kokkos::Tools::popRegion();
526 }
527#endif
528 timer_.BaseTimer::stop();
529 }
530
536 void start(const std::string name,
537 const bool push_kokkos_profiling_region = true) {
538 if (enable_timers_) {
539 if (top_ == nullptr) {
540 top_ = timer_.start(name.c_str());
541 } else {
542#ifdef HAVE_TEUCHOSCORE_KOKKOS
543 // Fence before starting timer to ignore async kernels started before this timer starts
544 if (Behavior::fenceTimers()) {
545 Kokkos::fence("timer_fence_begin_"+name);
546 }
547#endif
548 top_ = top_->start(name.c_str());
549#if defined(HAVE_TEUCHOS_KOKKOS_PROFILING) && defined(HAVE_TEUCHOSCORE_KOKKOS)
551 ::Kokkos::Tools::pushRegion(name);
552 }
553#endif
554 }
555 }
556 if (enable_verbose_) {
557 if (!verbose_timestamp_levels_) {
558 *verbose_ostream_ << "STARTING: " << name << std::endl;
559 }
560 // gcc 4.X is incomplete in c++11 standard - missing
561 // std::put_time. We'll disable this feature for gcc 4.
562#if !defined(__GNUC__) || ( defined(__GNUC__) && (__GNUC__ > 4) )
563 else if (top_ != nullptr) {
564 if ( top_->level() <= verbose_timestamp_levels_) {
565 auto now = std::chrono::system_clock::now();
566 auto now_time = std::chrono::system_clock::to_time_t(now);
567 auto gmt = gmtime(&now_time);
568 auto timestamp = std::put_time(gmt, "%Y-%m-%d %H:%M:%S");
569 auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(now.time_since_epoch()) % 1000;
570 *verbose_ostream_ << "STARTING: " << name << " LEVEL: " << top_->level() << " COUNT: " << timer_.numCalls() << " TIMESTAMP: " << timestamp << "." << ms.count() << std::endl;
571 }
572 }
573#endif
574 }
575 }
576
582 void stop(const std::string &name,
583 const bool pop_kokkos_profiling_region = true) {
584 if (enable_timers_) {
585 if (top_) {
586#ifdef HAVE_TEUCHOSCORE_KOKKOS
587 // Fence before stopping the timer to include async kokkos kernels launched within this timer
588 if (Behavior::fenceTimers()) {
589 Kokkos::fence("timer_fence_end_"+name);
590 }
591#endif
592#if defined(HAVE_TEUCHOS_KOKKOS_PROFILING) && defined(HAVE_TEUCHOSCORE_KOKKOS)
594 ::Kokkos::Tools::popRegion();
595 }
596#endif
597 top_ = top_->stop(name);
598 } else {
599 timer_.BaseTimer::stop();
600 }
601 }
602 if (enable_verbose_) {
603 if (!verbose_timestamp_levels_) {
604 *verbose_ostream_ << "STOPPING: " << name << std::endl;
605 }
606 // gcc 4.X is incomplete in c++11 standard - missing
607 // std::put_time. We'll disable this feature for gcc 4.
608#if !defined(__GNUC__) || ( defined(__GNUC__) && (__GNUC__ > 4) )
609 // The stop adjusts the top level, need to adjust by +1 for printing
610 else if (top_ != nullptr) {
611 if ( (top_->level()+1) <= verbose_timestamp_levels_) {
612 auto now = std::chrono::system_clock::now();
613 auto now_time = std::chrono::system_clock::to_time_t(now);
614 auto gmt = gmtime(&now_time);
615 auto timestamp = std::put_time(gmt, "%Y-%m-%d %H:%M:%S");
616 auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(now.time_since_epoch()) % 1000;
617 *verbose_ostream_ << "STOPPING: " << name << " LEVEL: " << top_->level()+1 << " COUNT: " << timer_.numCalls() << " TIMESTAMP: " << timestamp << "." << ms.count() << std::endl;
618 }
619 }
620#endif
621 }
622 }
623
628 void incrementUpdates(const long long i = 1) {
629 top_->incrementUpdates(i);
630 }
631
637 double accumulatedTime(const std::string &name="") {
638 if (top_) // Top is null for the head node when nothing is running
639 return top_->accumulatedTime(name);
640 else
641 return timer_.accumulatedTime(name);
642 }
643
649 double accumulatedTimePerUpdate(const std::string &name="") {
650 if (top_) // Top is null for the head node when nothing is running
651 return top_->accumulatedTimePerUpdate(name);
652 else
653 return timer_.accumulatedTimePerUpdate(name);
654 }
660 double accumulatedTimePerTimerCall(const std::string &name="") {
661 if (top_) // Top is null for the head node when nothing is running
662 return top_->accumulatedTimePerTimerCall(name);
663 else
664 return timer_.accumulatedTimePerTimerCall(name);
665 }
666
672 const BaseTimer* findBaseTimer(const std::string &name) const {
673 const BaseTimer* baseTimer = timer_.findBaseTimer(name);
674 TEUCHOS_TEST_FOR_EXCEPTION(baseTimer == nullptr, std::runtime_error,
675 "StackedTimer::findBaseTimer() failed to find a timer named \"" << name << "\"!\n");
676 return baseTimer;
677 }
678
684 BaseTimer::TimeInfo findTimer(const std::string &name) {
685 bool foundTimer = false;
686 const auto timeInfo = timer_.findTimer(name,foundTimer);
687 TEUCHOS_TEST_FOR_EXCEPTION(!foundTimer, std::runtime_error,
688 "StackedTimer::findTimer() failed to find a timer named \"" << name << "\"!\n");
689 return timeInfo;
690 }
691
692 void report(std::ostream &os) {
693 timer_.report(os);
694 }
695
711 OutputOptions() : output_fraction(false), output_total_updates(false), output_histogram(false),
712 output_minmax(false), output_proc_minmax(false), num_histogram(10), max_levels(INT_MAX),
713 print_warnings(true), align_columns(false), print_names_before_values(true),
714 drop_time(-1.0), output_per_proc_stddev(false) {}
715 bool output_fraction;
716 bool output_total_updates;
717 bool output_histogram;
718 bool output_minmax;
719 bool output_proc_minmax;
720 int num_histogram;
721 int max_levels;
722 bool print_warnings;
723 bool align_columns;
724 bool print_names_before_values;
725 double drop_time;
726 bool output_per_proc_stddev;
727 };
728
735 void report(std::ostream &os, Teuchos::RCP<const Teuchos::Comm<int> > comm, OutputOptions options = OutputOptions());
736
746 void reportXML(std::ostream &os, const std::string& datestamp, const std::string& timestamp, Teuchos::RCP<const Teuchos::Comm<int> > comm);
747
779 std::string reportWatchrXML(const std::string& name, Teuchos::RCP<const Teuchos::Comm<int> > comm);
780
782 void enableVerbose(const bool enable_verbose);
783
785 void enableVerboseTimestamps(const unsigned levels);
786
788 void setVerboseOstream(const Teuchos::RCP<std::ostream>& os);
789
792 void disableTimers();
793
796 void enableTimers();
797
803 void aggregateMpiData(Teuchos::RCP<const Teuchos::Comm<int> > comm, OutputOptions options = OutputOptions());
804
813 double getMpiAverageTime(const std::string& flat_timer_name);
814
823 double getMpiAverageCount(const std::string& flat_timer_name);
824
833 bool isTimer(const std::string& flat_timer_name);
834
835protected:
840
841 // Global MPI aggregation arrays
842 Array<std::string> flat_names_;
843 Array<double> min_;
844 Array<double> max_;
845 Array<int> procmin_;
846 Array<int> procmax_;
847 Array<double> sum_;
848 Array<double> sum_sq_;
849 Array<Array<int>> hist_;
850 Array<double> per_proc_stddev_min_;
851 Array<double> per_proc_stddev_max_;
854 Array<int> active_;
855 bool global_mpi_aggregation_called_;
856
859 std::string::size_type timer_names_;
860 std::string::size_type average_time_;
861 std::string::size_type fraction_;
862 std::string::size_type count_;
863 std::string::size_type total_updates_;
864 std::string::size_type min_;
865 std::string::size_type max_;
866 std::string::size_type procmin_;
867 std::string::size_type procmax_;
868 std::string::size_type stddev_;
869 std::string::size_type histogram_;
871 timer_names_(0),
872 average_time_(0),
873 fraction_(0),
874 count_(0),
875 total_updates_(0),
876 min_(0),
877 max_(0),
878 procmax_(0),
879 stddev_(0),
880 histogram_(0){}
881 } alignments_;
882
885
888
891
894
898 void flatten();
899
904 void merge(Teuchos::RCP<const Teuchos::Comm<int> > comm);
905
909 void collectRemoteData(Teuchos::RCP<const Teuchos::Comm<int> > comm, const OutputOptions &options );
910
914 int getFlatNameIndex(const std::string& flat_timer_name);
915
922 double computeColumnWidthsForAligment(std::string prefix, int print_level,
923 std::vector<bool> &printed, double parent_time,
924 const OutputOptions &options);
925
929 double printLevel(std::string prefix, int level, std::ostream &os, std::vector<bool> &printed,
930 double parent_time, const OutputOptions &options);
931
936 double printLevelXML(std::string prefix, int level, std::ostream &os, std::vector<bool> &printed, double parent_time, const std::string& rootName = "");
937
938}; //StackedTimer
939
940
941} //namespace Teuchos
942
943#endif /* TEUCHOS_STACKED_TIMER_HPP */
Templated array class derived from the STL std::vector.
Teuchos header file which uses auto-configuration information to include necessary C++ headers.
Common capabilities for collecting and reporting performance data collectively across MPI processes.
Reference-counted pointer class and non-member templated function implementations.
The basic timer used internally, uses std::chrono::high_resolution_clock.
void setAccumulatedTime(double accum=0)
Setter for accumulated time.
double difference(const BaseTimer &from) const
Return the difference between two timers in seconds,.
void reset()
Reset all the timer stats, throws if it is already running.
void start()
Start a currently stopped timer.
double accumulatedTimePerTimerCall() const
return the average time per timer start/stop
double accumulatedTimePerUpdate() const
return the average time per item updated
void stop()
Stop a current running timer and accumulate time difference.
void overrideNumUpdatesForUnitTesting(const unsigned long long num_updates)
Sets the number of counts for this timer. This is only used for unit testing.
double accumulatedTime() const
Get the total accumulated time since last reset or construction when the timer is running.
double timePerCallStdDev() const
return the std dev in time per timer start/stop
void overrideNumCallsForUnitTesting(const unsigned long num_calls)
Sets the number of calls to start() for this timer. This is only used for unit testing.
unsigned long long numUpdates() const
Returns the number of updates added to this timer.
void setAccumulatedTimeSquared(double accumSq=0)
Setter for squared accumulated time.
unsigned long numCalls() const
Returns the number of calls to start().
unsigned long long incrementUpdates(unsigned long long count=1)
Increment the total number of items updated between a start stop.
bool running() const
Returns true if the timer is currently accumulating time.
Smart reference counting pointer class for automatic garbage collection.
Timer info at a given level and all the children.
double accumulatedTime(const std::string &locate_name="")
LevelTimer * start(const char *sub_name)
void splitString(const std::string &locate_name, std::string &first_name, std::string &second_name)
split a string into two parts split by a '@' if no '@' first gets the full string
double accumulatedTimePerTimerCall(const std::string &locate_name="")
LevelTimer(const LevelTimer &src)
Copy constructor.
LevelTimer * stop(const std::string &name="RootTimer")
unsigned level() const
Returns the level of the timer in the stack.
double accumulatedTimePerUpdate(const std::string &locate_name="")
LevelTimer * unpack(unsigned from)
LevelTimer(int level, const char *name="RootTimer", LevelTimer *parent=nullptr, bool start_timer=true)
This class allows one to push and pop timers on and off a stack.
LevelTimer * top_
Current level running.
void start(const std::string name, const bool push_kokkos_profiling_region=true)
BaseTimer::TimeInfo findTimer(const std::string &name)
void stop(const std::string &name, const bool pop_kokkos_profiling_region=true)
void startBaseTimer(const bool push_kokkos_profiling_region=true)
void stopBaseTimer(const bool pop_kokkos_profiling_region=true)
const BaseTimer * findBaseTimer(const std::string &name) const
Teuchos::RCP< std::ostream > verbose_ostream_
For debugging, this is the ostream used for printing.
double accumulatedTimePerUpdate(const std::string &name="")
void incrementUpdates(const long long i=1)
bool enable_timers_
Used to disable timers for asynchronous work.
unsigned verbose_timestamp_levels_
If set to a value greater than 0, verbose mode will print that many levels of timers with timestamps....
double accumulatedTime(const std::string &name="")
LevelTimer timer_
Base timer.
bool enable_verbose_
If set to true, prints to the debug ostream. At construction, default value is set from environment v...
double accumulatedTimePerTimerCall(const std::string &name="")
StackedTimer(const char *name, const bool start_base_timer=true)
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Macro for throwing an exception with breakpointing to ease debugging.
The Teuchos namespace contains all of the classes, structs and enums used by Teuchos,...
void error_out(const std::string &msg, const bool)
Error reporting function for stacked timer.
Stores the column widths for output alignment.