From 8d30812696b7b1344ed00276b9d97a96406401a9 Mon Sep 17 00:00:00 2001 From: Thomas Kowalski Date: Tue, 17 Feb 2026 17:01:14 +0100 Subject: [PATCH] fix(profiling): upper bound on iterations for `TaskInfo::unwind` (#16510) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Description This PR updates the Task unwinding logic for the Profiler to have an upper bound on the number of (1) Tasks in the Task chain unwound (2) coroutines in the coroutine chain unwound. This is important because if somehow we have some memory corruption (very possible, as we don't take a snapshot of the interpreter memory but rather copy select "chunks" over time, and the state of Tasks can change as we copy those "chunks"), we could otherwise end up looping infinitely (which is bad for obvious reasons) and as a result try to add an infinite number of items to the Frame Stack (which is arguably significantly worse, as this would mean trying to allocate an infinite amount of memory 💣). We spotted this issue when we deployed `4.5.0rc2` to internal Rapid Python HTTP services, see IR-49542. Co-authored-by: thomas.kowalski (cherry picked from commit 0cfe067b01a81fd4ea886950eb02a9a05bbfdf17) Signed-off-by: Emmett Butler --- .../internal/datadog/profiling/stack/src/echion/tasks.cc | 9 ++++++++- .../datadog/profiling/stack/src/echion/threads.cc | 5 +++++ ...fix-max-iterations-unwind-tasks-671d743912c7d600.yaml | 4 ++++ 3 files changed, 17 insertions(+), 1 deletion(-) create mode 100644 releasenotes/notes/profiling-fix-max-iterations-unwind-tasks-671d743912c7d600.yaml diff --git a/ddtrace/internal/datadog/profiling/stack/src/echion/tasks.cc b/ddtrace/internal/datadog/profiling/stack/src/echion/tasks.cc index 15e03f9681a..57c008e75c1 100644 --- a/ddtrace/internal/datadog/profiling/stack/src/echion/tasks.cc +++ b/ddtrace/internal/datadog/profiling/stack/src/echion/tasks.cc @@ -170,9 +170,16 @@ TaskInfo::unwind(EchionSampler& echion, FrameStack& stack, bool using_uvloop) std::stack> coro_frames; // Unwind the coro chain + size_t coro_chain_depth = 0; for (auto py_coro = this->coro.get(); py_coro != NULL; py_coro = py_coro->await.get()) { - if (py_coro->frame != NULL) + coro_chain_depth++; + if (coro_chain_depth > MAX_RECURSION_DEPTH) { + break; + } + + if (py_coro->frame != NULL) { coro_frames.push(py_coro->frame); + } } // Total number of frames added to the Stack diff --git a/ddtrace/internal/datadog/profiling/stack/src/echion/threads.cc b/ddtrace/internal/datadog/profiling/stack/src/echion/threads.cc index 7e3751cbff7..646ce9c8bff 100644 --- a/ddtrace/internal/datadog/profiling/stack/src/echion/threads.cc +++ b/ddtrace/internal/datadog/profiling/stack/src/echion/threads.cc @@ -211,7 +211,12 @@ ThreadInfo::unwind_tasks(EchionSampler& echion, PyThreadState* tstate) auto stack_info = std::make_unique(leaf_task.get().name, leaf_task.get().is_on_cpu); auto& stack = stack_info->stack; + // Safety: prevent infinite loops from cycles in task chain maps + size_t task_chain_depth = 0; for (auto current_task = leaf_task;;) { + if (++task_chain_depth > MAX_RECURSION_DEPTH) { + break; + } auto& task = current_task.get(); auto task_stack_size = task.unwind(echion, stack, using_uvloop); diff --git a/releasenotes/notes/profiling-fix-max-iterations-unwind-tasks-671d743912c7d600.yaml b/releasenotes/notes/profiling-fix-max-iterations-unwind-tasks-671d743912c7d600.yaml new file mode 100644 index 00000000000..f3f0c6cfc94 --- /dev/null +++ b/releasenotes/notes/profiling-fix-max-iterations-unwind-tasks-671d743912c7d600.yaml @@ -0,0 +1,4 @@ +fixes: + - | + profiling: A bug where the Stack Profiler could loop infinitely (and allocate large amounts of memory, + leading to crashes) when sampling ``asyncio`` Tasks has been fixed.