From 94f24ed78741b70be0356560d85220913eb743a2 Mon Sep 17 00:00:00 2001
From: Enwei Zhu <21126786+syuoni@users.noreply.github.com>
Date: Fri, 19 Sep 2025 10:54:31 +0000
Subject: [PATCH] fix

Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com>
---
 tensorrt_llm/_torch/pyexecutor/guided_decoder.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tensorrt_llm/_torch/pyexecutor/guided_decoder.py b/tensorrt_llm/_torch/pyexecutor/guided_decoder.py
index c27c81f4b08..d280acf4c09 100644
--- a/tensorrt_llm/_torch/pyexecutor/guided_decoder.py
+++ b/tensorrt_llm/_torch/pyexecutor/guided_decoder.py
@@ -424,6 +424,11 @@ def __init__(self,
                                                dtype=torch.int32,
                                                pin_memory=True)
 
+        # torch.compile kernels are called with GIL being held;
+        # this could cause deadlock with CUDA callback to Python code.
+        # See: https://github.com/pytorch/pytorch/issues/163061
+        torch.compiler.set_stance("force_eager")
+
     @nvtx_range("GuidedDecoder.add_batch")
     def add_batch(self,
                   scheduled_requests: ScheduledRequests,