diff --git a/vllm_ascend/patch/worker/__init__.py b/vllm_ascend/patch/worker/__init__.py index e9bf8b0d17a..537752e875e 100644 --- a/vllm_ascend/patch/worker/__init__.py +++ b/vllm_ascend/patch/worker/__init__.py @@ -29,6 +29,7 @@ import vllm_ascend.patch.worker.patch_deepseek # noqa import vllm_ascend.patch.worker.patch_multimodal_merge # noqa import vllm_ascend.patch.worker.patch_minicpm # noqa +import vllm_ascend.patch.worker.patch_model_runner # noqa import vllm_ascend.patch.worker.patch_rope # noqa import vllm_ascend.patch.worker.patch_qwen3_next # noqa import vllm_ascend.patch.worker.patch_qwen3_next_mtp # noqa diff --git a/vllm_ascend/platform.py b/vllm_ascend/platform.py index 21746346c19..db9e7c83147 100644 --- a/vllm_ascend/platform.py +++ b/vllm_ascend/platform.py @@ -333,12 +333,6 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None: vllm_config) vllm_config.scheduler_config = recompute_scheduler_config - # When open PD separation + MTP + Full Graph + asynchronous scheduling, - # patch gpu_model_runner._update_states for solving - # decode nodes may be in eager mode. - if vllm_config.speculative_config and vllm_config.kv_transfer_config: - import vllm_ascend.patch.worker.patch_model_runner # noqa - # Extend original scheduler_config to use SchedulerDynamicBatch. if ascend_config.SLO_limits_for_dynamic_batch != -1: vllm_config.scheduler_config.scheduler_cls = (