vllm-project · wangxiyuan · Jan 30, 2026 · Jan 30, 2026
diff --git a/vllm_ascend/patch/worker/patch_model_runner.py b/vllm_ascend/patch/worker/patch_model_runner.py
@@ -275,7 +275,7 @@ def _update_states(self, scheduler_output: "SchedulerOutput") -> None:
         # request state. NOTE: The spec tokens are placeholders and not
         # added to token_ids_cpu.
         if self.is_kv_consumer and self.speculative_config and \
-            self.speculative_config.method == "mtp" and self.use_async_scheduling:
+            self.use_async_scheduling:
             req_state = self.requests[request.req_id]
             spec_token_ids = scheduler_output.scheduled_spec_decode_tokens.get(
                 request.req_id, [])

diff --git a/vllm_ascend/platform.py b/vllm_ascend/platform.py
@@ -336,8 +336,7 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
             # When open PD separation + MTP + Full Graph + asynchronous scheduling,
             # patch gpu_model_runner._update_states for solving
             # decode nodes may be in eager mode.
-            if vllm_config.speculative_config and vllm_config.kv_transfer_config and \
-               vllm_config.speculative_config.method == "mtp":
+            if vllm_config.speculative_config and vllm_config.kv_transfer_config:
                 import vllm_ascend.patch.worker.patch_model_runner  # noqa
 
         # Extend original scheduler_config to use SchedulerDynamicBatch.