diff --git a/python/sglang/srt/model_executor/piecewise_cuda_graph_runner.py b/python/sglang/srt/model_executor/piecewise_cuda_graph_runner.py index b1935d21c462..6f0ba95a44ff 100644 --- a/python/sglang/srt/model_executor/piecewise_cuda_graph_runner.py +++ b/python/sglang/srt/model_executor/piecewise_cuda_graph_runner.py @@ -609,7 +609,7 @@ def replay_prepare( buffers.input_ids[num_tokens:static_num_tokens].zero_() buffers.positions[num_tokens:static_num_tokens].zero_() if self.is_multimodal: - buffers.input_embeds[:, num_tokens:static_num_tokens].zero_() + buffers.input_embeds[num_tokens:static_num_tokens].zero_() if forward_batch.mrope_positions is not None: buffers.mrope_positions[:, num_tokens:static_num_tokens].zero_()