diff --git a/vllm_ascend/platform.py b/vllm_ascend/platform.py index 750616a9052..f884bbef0dd 100644 --- a/vllm_ascend/platform.py +++ b/vllm_ascend/platform.py @@ -359,6 +359,18 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None: "needs to be equal if use pcp or dcp > 1 in P/D disaggregate and kv pool scenario." ) + # NOTE: vllm sets `speculative_config.enforce_eager` as True if using + # deepseek_v32 with mtp. Since we support graph mode, we simply ignore + # it here. However, this fix will also implicitly ignore user setting of + # `speculative_config.enforce_eager`, we need to take care and remove it + # once vllm supports this feature. + speculative_config = vllm_config.speculative_config + if model_config and speculative_config and \ + hasattr(model_config.hf_text_config, "model_type") and \ + model_config.hf_text_config.model_type == "deepseek_v32" and \ + speculative_config.enforce_eager: + speculative_config.enforce_eager = False + @classmethod def import_kernels(cls) -> None: # Directly importing vllm_ascend_C prevents ASCEND_RT_VISIBLE_DEVICES