diff --git a/vllm/platforms/xpu.py b/vllm/platforms/xpu.py index 454d2301ec3c..c97c3297e1ba 100644 --- a/vllm/platforms/xpu.py +++ b/vllm/platforms/xpu.py @@ -201,9 +201,6 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None: if vllm_config.lora_config is not None: compilation_config.mode = CompilationMode.NONE - # decrease triton kernel compilation scratch space for speculative decoding - if vllm_config.speculative_config is not None: - os.environ["IGC_ForceOCLSIMDWidth"] = "16" # noqa: SIM112 # check and update parallel config parallel_config = vllm_config.parallel_config # Only override worker_cls if it's still the default "auto"