diff --git a/tensorrt_llm/_torch/pyexecutor/cuda_graph_runner.py b/tensorrt_llm/_torch/pyexecutor/cuda_graph_runner.py index d924f2ea457..b8e2754a9cb 100644 --- a/tensorrt_llm/_torch/pyexecutor/cuda_graph_runner.py +++ b/tensorrt_llm/_torch/pyexecutor/cuda_graph_runner.py @@ -389,6 +389,7 @@ def _get_padded_batch(self, batch: ScheduledRequests, if spec_res_mgr: spec_res_mgr.add_dummy_requests([CUDA_GRAPH_DUMMY_REQUEST_ID]) + self.padding_dummy_request.py_draft_tokens = [0] * runtime_draft_len batch.generation_requests.extend([self.padding_dummy_request] * padding_size) return padding_size