diff --git a/docker/Dockerfile.cpu b/docker/Dockerfile.cpu index c647d9036f40..5395b3884fb5 100644 --- a/docker/Dockerfile.cpu +++ b/docker/Dockerfile.cpu @@ -51,9 +51,6 @@ RUN --mount=type=cache,target=/root/.cache/uv \ uv pip install --upgrade pip && \ uv pip install -r requirements/cpu.txt -RUN --mount=type=cache,target=/root/.cache/uv \ - uv pip install intel-openmp==2024.2.1 intel_extension_for_pytorch==2.6.0 - ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:/opt/venv/lib/libiomp5.so:$LD_PRELOAD" RUN echo 'ulimit -c 0' >> ~/.bashrc diff --git a/requirements/cpu.txt b/requirements/cpu.txt index d4191888382c..c064ecbb9b1f 100644 --- a/requirements/cpu.txt +++ b/requirements/cpu.txt @@ -20,3 +20,7 @@ datasets # for benchmark scripts # cpu cannot use triton 3.3.0 triton==3.2.0; platform_machine == "x86_64" + +# Intel Extension for PyTorch, only for x86_64 CPUs +intel-openmp; platform_machine == "x86_64" +intel_extension_for_pytorch==2.7.0; platform_machine == "x86_64" diff --git a/vllm/model_executor/layers/quantization/ipex_quant.py b/vllm/model_executor/layers/quantization/ipex_quant.py index b7baa3d3363b..8108c797637d 100644 --- a/vllm/model_executor/layers/quantization/ipex_quant.py +++ b/vllm/model_executor/layers/quantization/ipex_quant.py @@ -14,7 +14,7 @@ from vllm.model_executor.layers.quantization.gptq import GPTQLinearMethod from vllm.platforms import current_platform -MIN_IPEX_VERSION = "2.5.0" +MIN_IPEX_VERSION = "2.7.0" class IPEXConfig(QuantizationConfig):