Skip to content

Commit

Permalink
perf_tests: coroutinize main loop
Browse files Browse the repository at this point in the history
Simplify and coroutinize main loop. This reduces the duplicated portion
in the main loop which handles the 4 compile-time cases of

{void returning, size_t returning} x {sync, async}

test case methods. We duplicate only as much as need to keep the main
loop efficient but try to share the remainder. We can also remove a lot
of boilerplate which is no longer needed in C++.

This fixes #2587 because the underlying bug there was a change in
one of the duplicated sections but not the other.

Performance should be similar after this change: any benchmark that does
substantial work won't see a change. Very short benchmarks are
sensitive to the work in the main loop and this is slightly better in
some cases and slightly worse in others. This was evaluated with
perf_tests_perf benchmark in this series.

We delegate the choice of whether to yield in the benchmark not to the
code under test (in the future returning case).

Fixes #2587.
  • Loading branch information
travisdowns committed Dec 18, 2024
1 parent 6f6fc56 commit ff20df3
Showing 1 changed file with 35 additions and 65 deletions.
100 changes: 35 additions & 65 deletions include/seastar/testing/perf_tests.hh
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

#include <fmt/format.h>

#include <seastar/core/coroutine.hh>
#include <seastar/core/future.hh>
#include <seastar/core/loop.hh>
#include <seastar/testing/linux_perf_event.hh>
Expand Down Expand Up @@ -222,39 +223,17 @@ public:

extern time_measurement measure_time;

namespace {

template<bool Condition, typename TrueFn, typename FalseFn>
struct do_if_constexpr_ : FalseFn {
do_if_constexpr_(TrueFn, FalseFn false_fn) : FalseFn(std::move(false_fn)) { }
decltype(auto) operator()() const {
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=64095
return FalseFn::operator()(0);
}
};
template<typename TrueFn, typename FalseFn>
struct do_if_constexpr_<true, TrueFn, FalseFn> : TrueFn {
do_if_constexpr_(TrueFn true_fn, FalseFn) : TrueFn(std::move(true_fn)) { }
decltype(auto) operator()() const { return TrueFn::operator()(0); }
};

template<bool Condition, typename TrueFn, typename FalseFn>
do_if_constexpr_<Condition, TrueFn, FalseFn> if_constexpr_(TrueFn&& true_fn, FalseFn&& false_fn)
{
return do_if_constexpr_<Condition, TrueFn, FalseFn>(std::forward<TrueFn>(true_fn),
std::forward<FalseFn>(false_fn));
}

}

template<typename Test>
class concrete_performance_test final : public performance_test {
std::optional<Test> _test;

using test_ret_type = decltype(_test->run());
// true iff the test method returns future<...>
static constexpr bool is_async_test = is_future<test_ret_type>::value;
// true iff the test returns the number of iterations run, otherwise it returns
// void and we consider each invocation to be 1 iteration
static constexpr bool is_iteration_returning = !(std::is_same_v<test_ret_type, future<>> || std::is_void_v<test_ret_type>);
private:
template<typename... Args>
auto run_test(Args&&...) {
return _test->run();
}

protected:
virtual void set_up() override {
Expand All @@ -267,45 +246,36 @@ protected:

[[gnu::hot]]
virtual future<run_result> do_single_run() override {
// Redundant 'this->'s courtesy of https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61636
_instructions_retired_counter.enable();
_cpu_cycles_retired_counter.enable();
return if_constexpr_<is_future<decltype(_test->run())>::value>([&] (auto&&...) {
measure_time.start_run(&_instructions_retired_counter);
return do_until([this] { return this->stop_iteration(); }, [this] {
return if_constexpr_<std::is_same_v<decltype(_test->run()), future<>>>([&] (auto&&...) {
this->next_iteration(1);
return _test->run();
}, [&] (auto&&... dependency) {
// We need `dependency` to make sure the compiler won't be able to instantiate anything
// (and notice that the code does not compile) if this part of if_constexpr_ is not active.
return run_test(dependency...).then([&] (size_t n) {
this->next_iteration(n);
});
})();
}).then([] {
return measure_time.stop_run();
}).finally([this] {
_instructions_retired_counter.disable();
_cpu_cycles_retired_counter.disable();
});
}, [&] (auto&&...) {
measure_time.start_run(&_instructions_retired_counter, &_cpu_cycles_retired_counter);
while (!stop_iteration()) {
if_constexpr_<std::is_void_v<decltype(_test->run())>>([&] (auto&&...) {
(void)_test->run();
this->next_iteration(1);
}, [&] (auto&&... dependency) {
// We need `dependency` to make sure the compiler won't be able to instantiate anything
// (and notice that the code does not compile) if this part of if_constexpr_ is not active.
this->next_iteration(run_test(dependency...));
})();
measure_time.start_run(&_instructions_retired_counter, &_cpu_cycles_retired_counter);
while (!stop_iteration()) {
if constexpr (is_async_test) {
if constexpr (is_iteration_returning) {
auto f = _test->run();
next_iteration(f.available() ? std::move(f).get() : co_await std::move(f));
} else {
auto f = _test->run();
// The available() check is functionally redundant, but is significantly faster
// than invoking the co_await machinery on a future-returning function.
if (!f.available()) {
co_await std::move(f);
}
next_iteration(1);
}
} else {
if constexpr (is_iteration_returning) {
next_iteration(_test->run());
} else {
_test->run();
next_iteration(1);
}
}
auto ret = measure_time.stop_run();
_instructions_retired_counter.disable();
_cpu_cycles_retired_counter.disable();
return make_ready_future<run_result>(std::move(ret));
})();
}
auto ret = measure_time.stop_run();
_instructions_retired_counter.disable();
_cpu_cycles_retired_counter.disable();
co_return ret;
}
public:
using performance_test::performance_test;
Expand Down

0 comments on commit ff20df3

Please sign in to comment.