vllm-project · wangxiyuan · Jan 16, 2026 · Jan 16, 2026 · gemini-code-assist · Jan 16, 2026
@@ -186,13 +186,12 @@ def __call__(self, *args, **kwargs):
             )
 
         logger.info_once("Replaying aclgraph")
-        # In async scheduling or multi-threaded (MT) scenarios when graph mode is FULL, it is possible that
+        # In async scheduling or multi-threaded (MT) scenarios, it is possible that
         # the CPU's record event (from update_attn_params) for the iteration i completes
         # before the grph replay of iteration i-1.
         # To ensure proper ordering, we must call synchronize here before replaying,
         # so that update_attn_params only executes after the previous graph replay has fully completed.
-        if self.runtime_mode == CUDAGraphMode.FULL:
-            torch.npu.synchronize()
+        torch.npu.synchronize()
         entry.aclgraph.replay()
         return entry.output