From be29a2fd913c6c5166ee9009602af9d69a7238df Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 2 Dec 2024 14:35:16 +0300 Subject: [PATCH] scheduler: Report long queue stall There can be at least two kinds of CPU stalls in seastar: task stall and queue stalls. The former is when a single task non-preemptively occupies a CPU for a too long period of time (configured by the option named blocked-reactor-notify-ms). The latter happens when the list of tasks to run happens to go above a certain threshold (configured by the option named max-task-backlog which is 1000 by default). In that case scheduler bites the buller and doesn't stop running tasks until the queue shrinks ignoring the need_preempt() value. The stall of the second kind can be detected by checking the "stalls" historgram in the "reactor" group and is also implicitly reported via time-spent-in-task-quota-voilation counter. However, it's good to have more information about this stall other than knowing that "bad things" had just happenned. The suggestion is to print which tasks are currently there in the queues by iterating the queue, collecting the typeif(*task).name()-s into a histogram and dumping it into logs. Signed-off-by: Pavel Emelyanov --- include/seastar/core/reactor.hh | 1 + src/core/reactor.cc | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/include/seastar/core/reactor.hh b/include/seastar/core/reactor.hh index 9296ced12b..ba17252a90 100644 --- a/include/seastar/core/reactor.hh +++ b/include/seastar/core/reactor.hh @@ -620,6 +620,7 @@ private: friend future<> seastar::destroy_scheduling_group(scheduling_group) noexcept; friend future<> seastar::rename_scheduling_group(scheduling_group sg, sstring new_name, sstring new_shortname) noexcept; friend future scheduling_group_key_create(scheduling_group_key_config cfg) noexcept; + friend seastar::internal::log_buf::inserter_iterator do_dump_task_queue(seastar::internal::log_buf::inserter_iterator it, const task_queue& tq); future fstatfs(int fd) noexcept; friend future> make_file_impl(int fd, file_open_options options, int flags, struct stat st) noexcept; diff --git a/src/core/reactor.cc b/src/core/reactor.cc index 8b7ba1ff24..faa96864f3 100644 --- a/src/core/reactor.cc +++ b/src/core/reactor.cc @@ -2578,6 +2578,21 @@ void reactor::register_metrics() { }); } +seastar::internal::log_buf::inserter_iterator do_dump_task_queue(seastar::internal::log_buf::inserter_iterator it, const reactor::task_queue& tq) { + memory::scoped_critical_alloc_section _; + std::unordered_map infos; + for (const auto& tp : tq._q) { + const std::type_info& ti = typeid(*tp); + auto [ it, ins ] = infos.emplace(std::make_pair(ti.name(), 0u)); + it->second++; + } + it = fmt::format_to(it, "Too long queue accumulated for {} ({} tasks)\n", tq._name, tq._q.size()); + for (auto& ti : infos) { + it = fmt::format_to(it, " {}: {}\n", ti.second, ti.first); + } + return it; +} + void reactor::run_tasks(task_queue& tq) { // Make sure new tasks will inherit our scheduling group *internal::current_scheduling_group_ptr() = scheduling_group(tq._id); @@ -2603,6 +2618,10 @@ void reactor::run_tasks(task_queue& tq) { // #302. reset_preemption_monitor(); lowres_clock::update(); + + static thread_local logger::rate_limit rate_limit(std::chrono::seconds(10)); + logger::lambda_log_writer writer([&tq] (auto it) { return do_dump_task_queue(it, tq); }); + seastar_logger.log(log_level::warn, rate_limit, writer); } } }