[Fix] Refactors capturing flag initialization to fix error in eager mode

yiz-liu · yiz-liu · commit e4ea6391d2fb · 2025-07-07T11:06:37.000+08:00
Signed-off-by: Yizhou Liu &lt;liu_yizhou@outlook.com&gt;
diff --git a/vllm_ascend/ascend_forward_context.py b/vllm_ascend/ascend_forward_context.py
@@ -55,6 +55,10 @@ def set_ascend_forward_context(
 
         forward_context.in_profile_run = in_profile_run
 
+        # NOTE: This cannot be set using set_forward_context
+        # due to multiple warmups before actual capturing
+        forward_context.capturing = False
+
         dp_world_size = get_dp_group().world_size
         if dp_world_size > 1 and forward_context.dp_metadata is not None:
             forward_context.max_tokens_across_dp = forward_context.dp_metadata.max_tokens_across_dp_cpu.item(
diff --git a/vllm_ascend/compilation/piecewise_backend.py b/vllm_ascend/compilation/piecewise_backend.py
@@ -157,7 +157,6 @@ def update_attn_params(self, graph_params, forward_context, runtime_shape):
     def __call__(self, *args) -> Any:
         forward_context = get_forward_context()
         graph_params = get_graph_params()
-        forward_context.capturing = False
 
         if not self.first_run_finished:
             self.first_run_finished = True