From 94f24ed78741b70be0356560d85220913eb743a2 Mon Sep 17 00:00:00 2001 From: Enwei Zhu <21126786+syuoni@users.noreply.github.com> Date: Fri, 19 Sep 2025 10:54:31 +0000 Subject: [PATCH] fix Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> --- tensorrt_llm/_torch/pyexecutor/guided_decoder.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tensorrt_llm/_torch/pyexecutor/guided_decoder.py b/tensorrt_llm/_torch/pyexecutor/guided_decoder.py index c27c81f4b08..d280acf4c09 100644 --- a/tensorrt_llm/_torch/pyexecutor/guided_decoder.py +++ b/tensorrt_llm/_torch/pyexecutor/guided_decoder.py @@ -424,6 +424,11 @@ def __init__(self, dtype=torch.int32, pin_memory=True) + # torch.compile kernels are called with GIL being held; + # this could cause deadlock with CUDA callback to Python code. + # See: https://github.com/pytorch/pytorch/issues/163061 + torch.compiler.set_stance("force_eager") + @nvtx_range("GuidedDecoder.add_batch") def add_batch(self, scheduled_requests: ScheduledRequests,