3333// see https://semver.org/
3434#define ANKERL_NANOBENCH_VERSION_MAJOR 4 // incompatible API changes
3535#define ANKERL_NANOBENCH_VERSION_MINOR 3 // backwards-compatible changes
36- #define ANKERL_NANOBENCH_VERSION_PATCH 4 // backwards-compatible bug fixes
36+ #define ANKERL_NANOBENCH_VERSION_PATCH 6 // backwards-compatible bug fixes
3737
3838// /////////////////////////////////////////////////////////////////////////////////////////////////
3939// public facing api - as minimal as possible
8888 } while (0 )
8989#endif
9090
91- #if defined(__linux__) && defined(PERF_EVENT_IOC_ID) && defined(PERF_COUNT_HW_REF_CPU_CYCLES) && defined(PERF_FLAG_FD_CLOEXEC) && \
92- !defined (ANKERL_NANOBENCH_DISABLE_PERF_COUNTERS)
93- // only enable perf counters on kernel 3.14 which seems to have all the necessary defines. The three PERF_... defines are not in
94- // kernel 2.6.32 (all others are).
95- # define ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS () 1
96- #else
97- # define ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS () 0
91+ #define ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS () 0
92+ #if defined(__linux__) && !defined(ANKERL_NANOBENCH_DISABLE_PERF_COUNTERS)
93+ # include < linux/version.h>
94+ # if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 0)
95+ // PERF_COUNT_HW_REF_CPU_CYCLES only available since kernel 3.3
96+ // PERF_FLAG_FD_CLOEXEC since kernel 3.14
97+ # undef ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS
98+ # define ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS () 1
99+ # endif
98100#endif
99101
100102#if defined(__clang__)
@@ -2210,20 +2212,20 @@ struct IterationLogic::Impl {
22102212 columns.emplace_back (10 , 1 , " err%" , " %" , rErrorMedian * 100.0 );
22112213
22122214 double rInsMedian = -1.0 ;
2213- if (mResult .has (Result::Measure::instructions)) {
2215+ if (mBench . performanceCounters () && mResult .has (Result::Measure::instructions)) {
22142216 rInsMedian = mResult .median (Result::Measure::instructions);
22152217 columns.emplace_back (18 , 2 , " ins/" + mBench .unit (), " " , rInsMedian / mBench .batch ());
22162218 }
22172219
22182220 double rCycMedian = -1.0 ;
2219- if (mResult .has (Result::Measure::cpucycles)) {
2221+ if (mBench . performanceCounters () && mResult .has (Result::Measure::cpucycles)) {
22202222 rCycMedian = mResult .median (Result::Measure::cpucycles);
22212223 columns.emplace_back (18 , 2 , " cyc/" + mBench .unit (), " " , rCycMedian / mBench .batch ());
22222224 }
22232225 if (rInsMedian > 0.0 && rCycMedian > 0.0 ) {
22242226 columns.emplace_back (9 , 3 , " IPC" , " " , rCycMedian <= 0.0 ? 0.0 : rInsMedian / rCycMedian);
22252227 }
2226- if (mResult .has (Result::Measure::branchinstructions)) {
2228+ if (mBench . performanceCounters () && mResult .has (Result::Measure::branchinstructions)) {
22272229 double rBraMedian = mResult .median (Result::Measure::branchinstructions);
22282230 columns.emplace_back (17 , 2 , " bra/" + mBench .unit (), " " , rBraMedian / mBench .batch ());
22292231 if (mResult .has (Result::Measure::branchmisses)) {
@@ -2402,6 +2404,14 @@ class LinuxPerformanceCounters {
24022404 return (a + divisor / 2 ) / divisor;
24032405 }
24042406
2407+ ANKERL_NANOBENCH_NO_SANITIZE (" integer" , " undefined" )
2408+ static inline uint32_t mix (uint32_t x) noexcept {
2409+ x ^= x << 13 ;
2410+ x ^= x >> 17 ;
2411+ x ^= x << 5 ;
2412+ return x;
2413+ }
2414+
24052415 template <typename Op>
24062416 ANKERL_NANOBENCH_NO_SANITIZE (" integer" , " undefined" )
24072417 void calibrate (Op&& op) {
@@ -2441,15 +2451,10 @@ class LinuxPerformanceCounters {
24412451 uint64_t const numIters = 100000U + (std::random_device{}() & 3 );
24422452 uint64_t n = numIters;
24432453 uint32_t x = 1234567 ;
2444- auto fn = [&]() {
2445- x ^= x << 13 ;
2446- x ^= x >> 17 ;
2447- x ^= x << 5 ;
2448- };
24492454
24502455 beginMeasure ();
24512456 while (n-- > 0 ) {
2452- fn ( );
2457+ x = mix (x );
24532458 }
24542459 endMeasure ();
24552460 detail::doNotOptimizeAway (x);
@@ -2459,8 +2464,8 @@ class LinuxPerformanceCounters {
24592464 beginMeasure ();
24602465 while (n-- > 0 ) {
24612466 // we now run *twice* so we can easily calculate the overhead
2462- fn ( );
2463- fn ( );
2467+ x = mix (x );
2468+ x = mix (x );
24642469 }
24652470 endMeasure ();
24662471 detail::doNotOptimizeAway (x);
0 commit comments