From 35285bed2f394c43d43ad4491efcd1efa2cb4ca5 Mon Sep 17 00:00:00 2001 From: Maarten Arnst Date: Thu, 25 Jan 2024 15:49:17 +0100 Subject: [PATCH] Add min rel accuracy stopping criterion Clean up the initial commit Further cleaning of initial commit. Add test. Improvements to comments thanks to review Reformat thanks to clang format. Static cast to avoid conversion warning --- include/benchmark/benchmark.h | 26 +++++- src/benchmark.cc | 30 +++++-- src/benchmark_api_internal.cc | 6 ++ src/benchmark_api_internal.h | 2 + src/benchmark_register.cc | 9 ++ src/benchmark_runner.cc | 49 ++++++++-- src/benchmark_runner.h | 10 +++ src/thread_manager.h | 1 + src/thread_timer.h | 11 ++- test/CMakeLists.txt | 3 + test/benchmark_min_rel_accuracy_flag_test.cc | 95 ++++++++++++++++++++ 11 files changed, 222 insertions(+), 20 deletions(-) create mode 100644 test/benchmark_min_rel_accuracy_flag_test.cc diff --git a/include/benchmark/benchmark.h b/include/benchmark/benchmark.h index 86f9dbbabb..568e2e45ba 100644 --- a/include/benchmark/benchmark.h +++ b/include/benchmark/benchmark.h @@ -126,8 +126,12 @@ template int BM_Sequential(benchmark::State& state) { } BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue)->Range(1<<0, 1<<10); -Use `Benchmark::MinTime(double t)` to set the minimum time used to run the -benchmark. This option overrides the `benchmark_min_time` flag. +Use `Benchmark::MinTime(double t)` to set the minimum time used to determine how +long to run the benchmark. This option overrides the `benchmark_min_time` flag. + +If a benchmark measures time manually, use `Benchmark::MinRelAccuracy(double r)` +to set the required minimum relative accuracy used to determine how long to run +the benchmark. This option overrides the `benchmark_min_rel_accuracy` flag. void BM_test(benchmark::State& state) { ... body ... @@ -1262,11 +1266,21 @@ class BENCHMARK_EXPORT Benchmark { // multiplier kRangeMultiplier will be used. Benchmark* RangeMultiplier(int multiplier); - // Set the minimum amount of time to use when running this benchmark. This - // option overrides the `benchmark_min_time` flag. + // Set the minimum amount of time to use to determine the required number + // of iterations when running this benchmark. This option overrides + // the `benchmark_min_time` flag. // REQUIRES: `t > 0` and `Iterations` has not been called on this benchmark. Benchmark* MinTime(double t); + // Set the minimum relative accuracy to use to determine the required number + // of iterations when running this benchmark. This option overrides + // the `benchmark_min_rel_accuracy` flag. + // REQUIRES: `r > 0`, `Iterations` has not been called on this benchmark, and + // time is measured manually, i.e., `UseManualTime` has been called on this + // benchmark and each benchmark iteration should call + // `SetIterationTime(seconds)` to report the measured time. + Benchmark* MinRelAccuracy(double r); + // Set the minimum amount of time to run the benchmark before taking runtimes // of this benchmark into account. This // option overrides the `benchmark_min_warmup_time` flag. @@ -1389,6 +1403,7 @@ class BENCHMARK_EXPORT Benchmark { int range_multiplier_; double min_time_; + double min_rel_accuracy_; double min_warmup_time_; IterationCount iterations_; int repetitions_; @@ -1821,6 +1836,7 @@ struct BENCHMARK_EXPORT BenchmarkName { std::string function_name; std::string args; std::string min_time; + std::string min_rel_accuracy; std::string min_warmup_time; std::string iterations; std::string repetitions; @@ -1860,6 +1876,7 @@ class BENCHMARK_EXPORT BenchmarkReporter { threads(1), time_unit(GetDefaultTimeUnit()), real_accumulated_time(0), + manual_accumulated_time_pow2(0), cpu_accumulated_time(0), max_heapbytes_used(0), use_real_time_for_initial_big_o(false), @@ -1888,6 +1905,7 @@ class BENCHMARK_EXPORT BenchmarkReporter { int64_t repetitions; TimeUnit time_unit; double real_accumulated_time; + double manual_accumulated_time_pow2; double cpu_accumulated_time; // Return a value representing the real time per iteration in the unit diff --git a/src/benchmark.cc b/src/benchmark.cc index 0ea90aeb6a..4aac339871 100644 --- a/src/benchmark.cc +++ b/src/benchmark.cc @@ -65,12 +65,12 @@ BM_DEFINE_bool(benchmark_list_tests, false); // linked into the binary are run. BM_DEFINE_string(benchmark_filter, ""); -// Specification of how long to run the benchmark. +// Specification of either an exact number of iterations (specified as +// `x`) or a minimum number of seconds (specified as `s`) used +// to determine how long to run the benchmark. // -// It can be either an exact number of iterations (specified as `x`), -// or a minimum number of seconds (specified as `s`). If the latter -// format (ie., min seconds) is used, the system may run the benchmark longer -// until the results are considered significant. +// If the latter format (ie., min seconds) is used, the system may run +// the benchmark longer until the results are considered significant. // // For backward compatibility, the `s` suffix may be omitted, in which case, // the specified number is interpreted as the number of seconds. @@ -81,6 +81,19 @@ BM_DEFINE_string(benchmark_filter, ""); // benchmark execution, regardless of number of threads. BM_DEFINE_string(benchmark_min_time, kDefaultMinTimeStr); +// Specification of required relative accuracy used to determine how +// long to run the benchmark. +// +// REQUIRES: time is measured manually. +// +// Manual timers provide per-iteration times. The relative accuracy is +// measured as the standard deviation of these per-iteration times divided by +// the mean and the square root of the number of iterations. The benchmark is +// run until both of the following conditions are fulfilled: +// 1. the specified minimum time or number of iterations is reached +// 2. the measured relative accuracy meets the specified requirement +BM_DEFINE_double(benchmark_min_rel_accuracy, 0.0); + // Minimum number of seconds a benchmark should be run before results should be // taken into account. This e.g can be necessary for benchmarks of code which // needs to fill some form of cache before performance is of interest. @@ -94,7 +107,7 @@ BM_DEFINE_int32(benchmark_repetitions, 1); // If enabled, forces each benchmark to execute exactly one iteration and one // repetition, bypassing any configured -// MinTime()/MinWarmUpTime()/Iterations()/Repetitions() +// MinTime()/MinRelAccuracy()/MinWarmUpTime()/Iterations()/Repetitions() BM_DEFINE_bool(benchmark_dry_run, false); // If set, enable random interleaving of repetitions of all benchmarks. @@ -722,6 +735,8 @@ void ParseCommandLineFlags(int* argc, char** argv) { ParseStringFlag(argv[i], "benchmark_filter", &FLAGS_benchmark_filter) || ParseStringFlag(argv[i], "benchmark_min_time", &FLAGS_benchmark_min_time) || + ParseDoubleFlag(argv[i], "benchmark_min_rel_accuracy", + &FLAGS_benchmark_min_rel_accuracy) || ParseDoubleFlag(argv[i], "benchmark_min_warmup_time", &FLAGS_benchmark_min_warmup_time) || ParseInt32Flag(argv[i], "benchmark_repetitions", @@ -793,7 +808,8 @@ void PrintDefaultHelp() { "benchmark" " [--benchmark_list_tests={true|false}]\n" " [--benchmark_filter=]\n" - " [--benchmark_min_time=`x` OR `s` ]\n" + " [--benchmark_min_time=`x` OR `s`]\n" + " [--benchmark_min_rel_accuracy=]\n" " [--benchmark_min_warmup_time=]\n" " [--benchmark_repetitions=]\n" " [--benchmark_dry_run={true|false}]\n" diff --git a/src/benchmark_api_internal.cc b/src/benchmark_api_internal.cc index 4b569d7982..5225751620 100644 --- a/src/benchmark_api_internal.cc +++ b/src/benchmark_api_internal.cc @@ -25,6 +25,7 @@ BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark, int family_idx, statistics_(benchmark_.statistics_), repetitions_(benchmark_.repetitions_), min_time_(benchmark_.min_time_), + min_rel_accuracy_(benchmark_.min_rel_accuracy_), min_warmup_time_(benchmark_.min_warmup_time_), iterations_(benchmark_.iterations_), threads_(thread_count) { @@ -51,6 +52,11 @@ BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark, int family_idx, name_.min_time = StrFormat("min_time:%0.3f", benchmark_.min_time_); } + if (!IsZero(benchmark->min_rel_accuracy_)) { + name_.min_rel_accuracy = + StrFormat("min_rel_accuracy:%0.3f", benchmark_.min_rel_accuracy_); + } + if (!IsZero(benchmark->min_warmup_time_)) { name_.min_warmup_time = StrFormat("min_warmup_time:%0.3f", benchmark_.min_warmup_time_); diff --git a/src/benchmark_api_internal.h b/src/benchmark_api_internal.h index 659a71440e..f4ee61045b 100644 --- a/src/benchmark_api_internal.h +++ b/src/benchmark_api_internal.h @@ -36,6 +36,7 @@ class BenchmarkInstance { const std::vector& statistics() const { return statistics_; } int repetitions() const { return repetitions_; } double min_time() const { return min_time_; } + double min_rel_accuracy() const { return min_rel_accuracy_; } double min_warmup_time() const { return min_warmup_time_; } IterationCount iterations() const { return iterations_; } int threads() const { return threads_; } @@ -64,6 +65,7 @@ class BenchmarkInstance { const std::vector& statistics_; int repetitions_; double min_time_; + double min_rel_accuracy_; double min_warmup_time_; IterationCount iterations_; int threads_; // Number of concurrent threads to us diff --git a/src/benchmark_register.cc b/src/benchmark_register.cc index 8ade048225..13ba9e554e 100644 --- a/src/benchmark_register.cc +++ b/src/benchmark_register.cc @@ -211,6 +211,7 @@ Benchmark::Benchmark(const std::string& name) use_default_time_unit_(true), range_multiplier_(kRangeMultiplier), min_time_(0), + min_rel_accuracy_(0), min_warmup_time_(0), iterations_(0), repetitions_(0), @@ -356,6 +357,14 @@ Benchmark* Benchmark::MinTime(double t) { return this; } +Benchmark* Benchmark::MinRelAccuracy(double r) { + BM_CHECK(r > 0.0); + BM_CHECK(iterations_ == 0); + BM_CHECK(use_manual_time_); + min_rel_accuracy_ = r; + return this; +} + Benchmark* Benchmark::MinWarmUpTime(double t) { BM_CHECK(t >= 0.0); BM_CHECK(iterations_ == 0); diff --git a/src/benchmark_runner.cc b/src/benchmark_runner.cc index 463f69fc52..88909f278f 100644 --- a/src/benchmark_runner.cc +++ b/src/benchmark_runner.cc @@ -60,6 +60,7 @@ namespace benchmark { BM_DECLARE_bool(benchmark_dry_run); BM_DECLARE_string(benchmark_min_time); +BM_DECLARE_double(benchmark_min_rel_accuracy); BM_DECLARE_double(benchmark_min_warmup_time); BM_DECLARE_int32(benchmark_repetitions); BM_DECLARE_bool(benchmark_report_aggregates_only); @@ -103,6 +104,7 @@ BenchmarkReporter::Run CreateRunReport( if (!report.skipped) { if (b.use_manual_time()) { report.real_accumulated_time = results.manual_time_used; + report.manual_accumulated_time_pow2 = results.manual_time_used_pow2; } else { report.real_accumulated_time = results.real_time_used; } @@ -151,6 +153,7 @@ void RunInThread(const BenchmarkInstance* b, IterationCount iters, results.cpu_time_used += timer.cpu_time_used(); results.real_time_used += timer.real_time_used(); results.manual_time_used += timer.manual_time_used(); + results.manual_time_used_pow2 += timer.manual_time_used_pow2(); results.complexity_n += st.complexity_length_n(); internal::Increment(&results.counters, st.counters); } @@ -239,6 +242,11 @@ BenchmarkRunner::BenchmarkRunner( min_time(FLAGS_benchmark_dry_run ? 0 : ComputeMinTime(b_, parsed_benchtime_flag)), + min_rel_accuracy(FLAGS_benchmark_dry_run + ? std::numeric_limits::max() + : (!IsZero(b.min_rel_accuracy()) + ? b.min_rel_accuracy() + : FLAGS_benchmark_min_rel_accuracy)), min_warmup_time( FLAGS_benchmark_dry_run ? 0 @@ -318,8 +326,10 @@ BenchmarkRunner::IterationResults BenchmarkRunner::DoNIterations() { // Base decisions off of real time if requested by this benchmark. i.seconds = i.results.cpu_time_used; + i.seconds_pow2 = 0; if (b.use_manual_time()) { i.seconds = i.results.manual_time_used; + i.seconds_pow2 = i.results.manual_time_used_pow2; } else if (b.use_real_time()) { i.seconds = i.results.real_time_used; } @@ -340,6 +350,11 @@ IterationCount BenchmarkRunner::PredictNumItersNeeded( const bool is_significant = (i.seconds / GetMinTimeToApply()) > 0.1; multiplier = is_significant ? multiplier : 10.0; + if (!IsZero(GetMinRelAccuracy())) { + multiplier = + std::max(multiplier, GetRelAccuracy(i) * 1.4 / GetMinRelAccuracy()); + } + // So what seems to be the sufficiently-large iteration count? Round up. const IterationCount max_next_iters = static_cast( std::llround(std::max(multiplier * static_cast(i.iters), @@ -357,14 +372,12 @@ bool BenchmarkRunner::ShouldReportIterationResults( // Either it has run for a sufficient amount of time // or because an error was reported. return i.results.skipped_ || FLAGS_benchmark_dry_run || - i.iters >= kMaxIterations || // Too many iterations already. - i.seconds >= - GetMinTimeToApply() || // The elapsed time is large enough. - // CPU time is specified but the elapsed real time greatly exceeds - // the minimum time. - // Note that user provided timers are except from this test. - ((i.results.real_time_used >= 5 * GetMinTimeToApply()) && - !b.use_manual_time()); + // Too many iterations already. + i.iters >= kMaxIterations || + // We have applied for enough time and the relative accuracy is good + // enough. Relative accuracy is checked only for user provided timers. + (HasSufficientTimeToApply(i) && + (!b.use_manual_time() || HasSufficientRelAccuracy(i))); } double BenchmarkRunner::GetMinTimeToApply() const { @@ -376,6 +389,26 @@ double BenchmarkRunner::GetMinTimeToApply() const { return warmup_done ? min_time : min_warmup_time; } +double BenchmarkRunner::GetRelAccuracy(const IterationResults& i) const { + return std::sqrt(i.seconds_pow2 - std::pow(i.seconds, 2.) / static_cast(i.iters)) / i.seconds; +} + +bool BenchmarkRunner::HasSufficientTimeToApply( + const IterationResults& i) const { + return i.seconds >= GetMinTimeToApply() || + // CPU time is specified but the elapsed real time greatly exceeds + // the minimum time. + // Note that user provided timers are except from this test. + (!b.use_manual_time() && + i.results.real_time_used >= 5 * GetMinTimeToApply()); +} + +bool BenchmarkRunner::HasSufficientRelAccuracy( + const IterationResults& i) const { + return (IsZero(GetMinRelAccuracy()) || + (GetRelAccuracy(i) <= GetMinRelAccuracy())); +} + void BenchmarkRunner::FinishWarmUp(const IterationCount& i) { warmup_done = true; iters = i; diff --git a/src/benchmark_runner.h b/src/benchmark_runner.h index 6e5ceb31e0..13de62d95c 100644 --- a/src/benchmark_runner.h +++ b/src/benchmark_runner.h @@ -71,6 +71,8 @@ class BenchmarkRunner { double GetMinTime() const { return min_time; } + double GetMinRelAccuracy() const { return min_rel_accuracy; } + bool HasExplicitIters() const { return has_explicit_iteration_count; } IterationCount GetIters() const { return iters; } @@ -83,6 +85,7 @@ class BenchmarkRunner { BenchTimeType parsed_benchtime_flag; const double min_time; + const double min_rel_accuracy; const double min_warmup_time; bool warmup_done; const int repeats; @@ -104,6 +107,7 @@ class BenchmarkRunner { internal::ThreadManager::Result results; IterationCount iters; double seconds; + double seconds_pow2; }; IterationResults DoNIterations(); @@ -117,6 +121,12 @@ class BenchmarkRunner { double GetMinTimeToApply() const; + double GetRelAccuracy(const IterationResults& i) const; + + bool HasSufficientTimeToApply(const IterationResults& i) const; + + bool HasSufficientRelAccuracy(const IterationResults& i) const; + void FinishWarmUp(const IterationCount& i); void RunWarmUp(); diff --git a/src/thread_manager.h b/src/thread_manager.h index 819b3c44db..e3e6c5a2b8 100644 --- a/src/thread_manager.h +++ b/src/thread_manager.h @@ -41,6 +41,7 @@ class ThreadManager { double real_time_used = 0; double cpu_time_used = 0; double manual_time_used = 0; + double manual_time_used_pow2 = 0; int64_t complexity_n = 0; std::string report_label_; std::string skip_message_; diff --git a/src/thread_timer.h b/src/thread_timer.h index eb23f59561..ffe3c9f3ba 100644 --- a/src/thread_timer.h +++ b/src/thread_timer.h @@ -38,7 +38,10 @@ class ThreadTimer { } // Called by each thread - void SetIterationTime(double seconds) { manual_time_used_ += seconds; } + void SetIterationTime(double seconds) { + manual_time_used_ += seconds; + manual_time_used_pow2_ += std::pow(seconds, 2.); + } bool running() const { return running_; } @@ -60,6 +63,11 @@ class ThreadTimer { return manual_time_used_; } + double manual_time_used_pow2() const { + BM_CHECK(!running_); + return manual_time_used_pow2_; + } + private: double ReadCpuTimerOfChoice() const { if (measure_process_cpu_time) return ProcessCPUUsage(); @@ -78,6 +86,7 @@ class ThreadTimer { double cpu_time_used_ = 0; // Manually set iteration time. User sets this with SetIterationTime(seconds). double manual_time_used_ = 0; + double manual_time_used_pow2_ = 0; }; } // namespace internal diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 321e24d94b..853581634a 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -97,6 +97,9 @@ benchmark_add_test(NAME min_time_flag_time COMMAND benchmark_min_time_flag_time_ compile_benchmark_test(benchmark_min_time_flag_iters_test) benchmark_add_test(NAME min_time_flag_iters COMMAND benchmark_min_time_flag_iters_test) +compile_benchmark_test(benchmark_min_rel_accuracy_flag_test) +benchmark_add_test(NAME min_rel_accuracy_flag_test COMMAND benchmark_min_rel_accuracy_flag_test) + add_filter_test(filter_simple "Foo" 3) add_filter_test(filter_simple_negative "-Foo" 2) add_filter_test(filter_suffix "BM_.*" 4) diff --git a/test/benchmark_min_rel_accuracy_flag_test.cc b/test/benchmark_min_rel_accuracy_flag_test.cc new file mode 100644 index 0000000000..35606091a4 --- /dev/null +++ b/test/benchmark_min_rel_accuracy_flag_test.cc @@ -0,0 +1,95 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "benchmark/benchmark.h" + +// Tests that if a benchmark measures time manually, we can specify the required +// relative accuracy with --benchmark_min_rel_accuracy=. +namespace { + +class TestReporter : public benchmark::ConsoleReporter { + public: + virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE { + return ConsoleReporter::ReportContext(context); + }; + + virtual void ReportRuns(const std::vector& report) BENCHMARK_OVERRIDE { + assert(report.size() == 1); + iters_.push_back(report[0].iterations); + real_accumulated_times_.push_back(report[0].real_accumulated_time); + manual_accumulated_time_pow2s_.push_back( + report[0].manual_accumulated_time_pow2); + ConsoleReporter::ReportRuns(report); + }; + + TestReporter() {} + + virtual ~TestReporter() {} + + const std::vector& GetIters() const { + return iters_; + } + + const std::vector& GetRealAccumulatedTimes() const { + return real_accumulated_times_; + } + + const std::vector& GetManualAccumulatedTimePow2s() const { + return manual_accumulated_time_pow2s_; + } + + private: + std::vector iters_; + std::vector real_accumulated_times_; + std::vector manual_accumulated_time_pow2s_; +}; + +} // end namespace + +static void BM_MyBench(benchmark::State& state) { + static std::mt19937 rd{std::random_device{}()}; + static std::uniform_real_distribution mrand(0, 1); + + for (auto s : state) { + state.SetIterationTime(mrand(rd)); + } +} +BENCHMARK(BM_MyBench)->UseManualTime(); + +int main(int argc, char** argv) { + // Make a fake argv and append the new + // --benchmark_min_rel_accuracy= to it. + int fake_argc = argc + 2; + const char** fake_argv = new const char*[static_cast(fake_argc)]; + for (int i = 0; i < argc; ++i) fake_argv[i] = argv[i]; + fake_argv[argc] = "--benchmark_min_time=10s"; + fake_argv[argc + 1] = "--benchmark_min_rel_accuracy=0.01"; + + benchmark::Initialize(&fake_argc, const_cast(fake_argv)); + + TestReporter test_reporter; + const size_t returned_count = + benchmark::RunSpecifiedBenchmarks(&test_reporter, "BM_MyBench"); + assert(returned_count == 1); + + // Check the executed iters. + const auto iters = static_cast(test_reporter.GetIters()[0]); + const double real_accumulated_time = + test_reporter.GetRealAccumulatedTimes()[0]; + const double manual_accumulated_time_pow2 = + test_reporter.GetManualAccumulatedTimePow2s()[0]; + + const double rel_accuracy = + std::sqrt(manual_accumulated_time_pow2 / iters - + std::pow(real_accumulated_time / iters, 2.)) / + (real_accumulated_time / iters) / sqrt(iters); + assert(rel_accuracy <= 0.01); + + delete[] fake_argv; + return 0; +}