diff --git a/vllm_ascend/compilation/acl_graph.py b/vllm_ascend/compilation/acl_graph.py index ed8673d21a0..93c32ce708e 100644 --- a/vllm_ascend/compilation/acl_graph.py +++ b/vllm_ascend/compilation/acl_graph.py @@ -196,7 +196,7 @@ def __call__(self, *args, **kwargs): else False ) if self.runtime_mode != CUDAGraphMode.FULL or not forward_context.is_draft_model or not use_eagle: - torch.npu.synchronize() + torch.npu.current_stream().synchronize() entry.aclgraph.replay() return entry.output