From c5a50857520837573a4bc20a7b27e238a8210c68 Mon Sep 17 00:00:00 2001 From: Adam Ghandoura Date: Fri, 13 Feb 2026 12:53:33 +0100 Subject: [PATCH 1/7] ubi: add pip check and support synapse revision latest Use --no-cache-dir for pip installs to reduce image size. Run pip check during build to validate Python dependencies. Allow SYNAPSE_REVISION as exact value (e.g. 695) or latest with revision detection. Signed-off-by: Adam Ghandoura --- .cd/Dockerfile.rhel.ubi.vllm | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/.cd/Dockerfile.rhel.ubi.vllm b/.cd/Dockerfile.rhel.ubi.vllm index 70971773bf..b25fabf33a 100644 --- a/.cd/Dockerfile.rhel.ubi.vllm +++ b/.cd/Dockerfile.rhel.ubi.vllm @@ -4,6 +4,7 @@ # Global build arguments - declared before first FROM to be available in all stages ARG ARTIFACTORY_URL="vault.habana.ai" ARG SYNAPSE_VERSION=1.23.0 +# Use an exact revision (e.g. 695) or "latest" to auto-detect the newest available revision. ARG SYNAPSE_REVISION=695 ARG BASE_NAME=rhel9.6 ARG OS_VERSION=9.6 @@ -83,8 +84,8 @@ RUN alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 2 && \ ln -s /usr/bin/python3 /usr/bin/python # Install base Python packages -RUN pip install setuptools==79.0.1 wheel setuptools_scm && \ - pip install --upgrade Jinja2 protobuf urllib3 requests +RUN pip install --no-cache-dir setuptools==79.0.1 wheel setuptools_scm && \ + pip install --no-cache-dir --upgrade Jinja2 protobuf urllib3 requests # Setup Habana repository and install Habana packages RUN printf "[habanalabs]\\nname=Habana RH9 Linux repo\\nbaseurl=https://${ARTIFACTORY_URL}/artifactory/${HABANA_RPM_REPO_PATH}\\ngpgkey=https://${ARTIFACTORY_URL}/artifactory/${HABANA_RPM_REPO_PATH}/repodata/repomd.xml.key\\ngpgcheck=1\\n" > /etc/yum.repos.d/habanalabs.repo && \ @@ -96,17 +97,30 @@ RUN printf "[habanalabs]\\nname=Habana RH9 Linux repo\\nbaseurl=https://${ARTIFA rpm --import "${_GPG_TEMP}/habana_pubkey" && \ rm -rf "${_GPG_TEMP}" && \ dnf makecache && \ + if [ "${SYNAPSE_REVISION}" = "latest" ]; then \ + dnf install -y \ + habanalabs-rdma-core-"$SYNAPSE_VERSION"* \ + habanalabs-thunk-"$SYNAPSE_VERSION"* \ + habanalabs-firmware-tools-"$SYNAPSE_VERSION"* \ + habanalabs-graph-"$SYNAPSE_VERSION"*; \ + else \ dnf install -y \ habanalabs-rdma-core-"$SYNAPSE_VERSION"-"$SYNAPSE_REVISION"* \ habanalabs-thunk-"$SYNAPSE_VERSION"-"$SYNAPSE_REVISION"* \ habanalabs-firmware-tools-"$SYNAPSE_VERSION"-"$SYNAPSE_REVISION"* \ - habanalabs-graph-"$SYNAPSE_VERSION"-"$SYNAPSE_REVISION"* && \ + habanalabs-graph-"$SYNAPSE_VERSION"-"$SYNAPSE_REVISION"*; \ + fi && \ + DETECTED_SYNAPSE_REVISION=$(rpm -q --qf '%{RELEASE}\n' habanalabs-rdma-core | head -n1 | sed 's/\..*$//') && \ + mkdir -p /etc/habanalabs && \ + echo "${DETECTED_SYNAPSE_REVISION}" > /etc/habanalabs/synapse_revision && \ + echo "Detected Synapse revision: ${DETECTED_SYNAPSE_REVISION}" && \ dnf clean all && \ chmod +t /var/log/habana_logs && \ rm -f /etc/yum.repos.d/habanalabs.repo # Install Habana media loader and configure Python path -RUN pip install habana-media-loader=="${SYNAPSE_VERSION}"."${SYNAPSE_REVISION}" --extra-index-url ${PYPI_INDEX_URL} && \ +RUN SYNAPSE_REVISION_RUNTIME=$(cat /etc/habanalabs/synapse_revision) && \ + pip install --no-cache-dir habana-media-loader=="${SYNAPSE_VERSION}"."${SYNAPSE_REVISION_RUNTIME}" --extra-index-url ${PYPI_INDEX_URL} && \ echo "/usr/lib/habanalabs" > $(python3 -c "import sysconfig; print(sysconfig.get_path('platlib'))")/habanalabs-graph.pth # ============================================================================ @@ -129,14 +143,15 @@ ARG TORCH_TYPE ENV OS_STRING="${OS_STRING}" # Use installer script from Habana to install Pytorch -RUN PT_PACKAGE_NAME="pytorch_modules${PT_PACKAGE_NAME_NON_DEFAULT_PYTHON_SUBSTRING}-v${PT_VERSION}_${SYNAPSE_VERSION}_${SYNAPSE_REVISION}.tgz" && \ - PT_ARTIFACT_PATH="https://${ARTIFACTORY_URL}/artifactory/${PT_MODULES_REPO_NAME}/${SYNAPSE_VERSION}/${SYNAPSE_REVISION}/pytorch/${OS_STRING}" && \ +RUN SYNAPSE_REVISION_RUNTIME=$(cat /etc/habanalabs/synapse_revision) && \ + PT_PACKAGE_NAME="pytorch_modules${PT_PACKAGE_NAME_NON_DEFAULT_PYTHON_SUBSTRING}-v${PT_VERSION}_${SYNAPSE_VERSION}_${SYNAPSE_REVISION_RUNTIME}.tgz" && \ + PT_ARTIFACT_PATH="https://${ARTIFACTORY_URL}/artifactory/${PT_MODULES_REPO_NAME}/${SYNAPSE_VERSION}/${SYNAPSE_REVISION_RUNTIME}/pytorch/${OS_STRING}" && \ TMP_PATH=$(mktemp --directory) && \ wget --no-verbose "${PT_ARTIFACT_PATH}/${PT_PACKAGE_NAME}" && \ tar -zxf "${PT_PACKAGE_NAME}" -C "${TMP_PATH}" && \ cd "${TMP_PATH}" && \ export SKIP_INSTALL_DEPENDENCIES=1 && \ - PYTHON_INDEX_URL="--extra-index-url ${PYPI_INDEX_URL}" ./install.sh $SYNAPSE_VERSION $SYNAPSE_REVISION $TORCH_TYPE && \ + PYTHON_INDEX_URL="--extra-index-url ${PYPI_INDEX_URL}" ./install.sh $SYNAPSE_VERSION $SYNAPSE_REVISION_RUNTIME $TORCH_TYPE && \ cd / && \ rm -rf "${TMP_PATH}" "${PT_PACKAGE_NAME}" @@ -188,11 +203,12 @@ RUN set -e && \ git remote add upstream https://github.com/vllm-project/vllm.git && \ git fetch upstream --tags && \ git checkout ${VLLM_PROJECT_COMMIT} && \ - pip install -r <(sed '/^torch/d' requirements/build.txt) && \ - VLLM_TARGET_DEVICE=empty pip install --no-build-isolation . && \ + pip install --no-cache-dir -r <(sed '/^torch/d' requirements/build.txt) && \ + VLLM_TARGET_DEVICE=empty pip install --no-cache-dir --no-build-isolation . && \ cd $VLLM_PATH2 && \ git checkout ${VLLM_GAUDI_COMMIT} && \ - VLLM_TARGET_DEVICE=hpu pip install -v . --no-build-isolation + VLLM_TARGET_DEVICE=hpu pip install --no-cache-dir -v . --no-build-isolation && \ + pip check # The scripts below are used for benchmarks testing and autocalc: # RUN pip3 install -v -e $VLLM_PATH/tests/vllm_test_utils From 453108f77f2ab1e08ab1d5fc022e0fb7fce0b833 Mon Sep 17 00:00:00 2001 From: Adam Ghandoura Date: Fri, 13 Feb 2026 13:25:56 +0100 Subject: [PATCH 2/7] ubi image: add comment about image lable Signed-off-by: Adam Ghandoura --- .cd/Dockerfile.rhel.ubi.vllm | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.cd/Dockerfile.rhel.ubi.vllm b/.cd/Dockerfile.rhel.ubi.vllm index b25fabf33a..4cdc4ac7b8 100644 --- a/.cd/Dockerfile.rhel.ubi.vllm +++ b/.cd/Dockerfile.rhel.ubi.vllm @@ -5,6 +5,7 @@ ARG ARTIFACTORY_URL="vault.habana.ai" ARG SYNAPSE_VERSION=1.23.0 # Use an exact revision (e.g. 695) or "latest" to auto-detect the newest available revision. +# If you do not want metadata to show "latest", pass an exact SYNAPSE_REVISION. ARG SYNAPSE_REVISION=695 ARG BASE_NAME=rhel9.6 ARG OS_VERSION=9.6 @@ -36,7 +37,9 @@ ARG PYPI_INDEX_URL ARG HABANA_RPM_REPO_PATH ARG TORCH_TYPE -# Labels for RHEL certification +# Labels for RHEL certification. +# Note: when SYNAPSE_REVISION=latest, this metadata intentionally records "latest" +# (the requested input), not the detected numeric revision. LABEL vendor="Habanalabs Ltd." \ release="${SYNAPSE_VERSION}-${SYNAPSE_REVISION}" From 41a2cb369626860c8ce1f885601ebd672b121bf3 Mon Sep 17 00:00:00 2001 From: Adam Ghandoura Date: Fri, 13 Feb 2026 16:32:59 +0100 Subject: [PATCH 3/7] remove crb repo and add allow-erasing Signed-off-by: Adam Ghandoura --- .cd/Dockerfile.rhel.ubi.vllm | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.cd/Dockerfile.rhel.ubi.vllm b/.cd/Dockerfile.rhel.ubi.vllm index 4cdc4ac7b8..a949bce0aa 100644 --- a/.cd/Dockerfile.rhel.ubi.vllm +++ b/.cd/Dockerfile.rhel.ubi.vllm @@ -57,20 +57,19 @@ ENV TORCH_TYPE=${TORCH_TYPE} \ COPY LICENSE /licenses/ # System setup - Remove FIPS provider and add EPEL -RUN dnf install -y python3-dnf-plugin-versionlock && \ +RUN dnf install -y --allowerasing python3-dnf-plugin-versionlock && \ dnf versionlock add redhat-release* && \ # '|| true' is added to support RHEL 9.4 in which openssl-fips-provider-so is not installed rpm -e --nodeps openssl-fips-provider-so || true && \ - dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \ + dnf install -y --allowerasing https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \ dnf clean all # Add CentOS repositories for additional packages RUN printf "[BaseOS]\\nname=CentOS Linux 9 - BaseOS\\nbaseurl=https://mirror.stream.centos.org/9-stream/BaseOS/x86_64/os\\ngpgkey=https://www.centos.org/keys/RPM-GPG-KEY-CentOS-Official-SHA256\\ngpgcheck=1\\n" > /etc/yum.repos.d/CentOS-Linux-BaseOS.repo && \ - printf "[centos9]\\nname=CentOS Linux 9 - AppStream\\nbaseurl=https://mirror.stream.centos.org/9-stream/AppStream/x86_64/os\\ngpgkey=https://www.centos.org/keys/RPM-GPG-KEY-CentOS-Official-SHA256\\ngpgcheck=1\\n" > /etc/yum.repos.d/CentOS-Linux-AppStream.repo && \ - printf "[CRB]\\nname=CentOS Linux 9 - CRB\\nbaseurl=https://mirror.stream.centos.org/9-stream/CRB/x86_64/os\\ngpgkey=https://www.centos.org/keys/RPM-GPG-KEY-CentOS-Official-SHA256\\ngpgcheck=1\\n" > /etc/yum.repos.d/CentOS-Linux-CRB.repo + printf "[centos9]\\nname=CentOS Linux 9 - AppStream\\nbaseurl=https://mirror.stream.centos.org/9-stream/AppStream/x86_64/os\\ngpgkey=https://www.centos.org/keys/RPM-GPG-KEY-CentOS-Official-SHA256\\ngpgcheck=1\\n" > /etc/yum.repos.d/CentOS-Linux-AppStream.repo # Install system dependencies -RUN dnf install -y \ +RUN dnf install -y --allowerasing \ wget git jq libomp \ # Image processing dependencies (needed for pillow-simd -> habana-media-loader) zlib-devel libjpeg-devel \ @@ -181,7 +180,7 @@ ENV BASE_NAME=${BASE_NAME} \ VLLM_PATH2=/workspace/vllm-gaudi # Install additional system dependencies -RUN dnf update -y --nobest && \ +RUN dnf update -y --nobest --allowerasing --skip-broken && \ dnf install -y gettext jq git --allowerasing && \ ln -sf /usr/bin/python3 /usr/bin/python && \ dnf clean all @@ -239,3 +238,4 @@ COPY LICENSE /licenses/vllm.md USER 2000 ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"] + From 310ece6ff92f4cfedacaac177f9feb1b5db00f3f Mon Sep 17 00:00:00 2001 From: Adam Ghandoura Date: Wed, 18 Feb 2026 16:31:15 +0100 Subject: [PATCH 4/7] more fixes Signed-off-by: Adam Ghandoura --- .cd/Dockerfile.rhel.ubi.vllm | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/.cd/Dockerfile.rhel.ubi.vllm b/.cd/Dockerfile.rhel.ubi.vllm index 260c333676..f0bd096638 100644 --- a/.cd/Dockerfile.rhel.ubi.vllm +++ b/.cd/Dockerfile.rhel.ubi.vllm @@ -57,7 +57,8 @@ ENV TORCH_TYPE=${TORCH_TYPE} \ COPY LICENSE /licenses/ # System setup - Remove FIPS provider and add EPEL -RUN dnf install -y --allowerasing python3-dnf-plugin-versionlock && \ +RUN dnf -y update && \ + dnf install -y --allowerasing python3-dnf-plugin-versionlock && \ dnf versionlock add redhat-release* && \ # '|| true' is added to support RHEL 9.4 in which openssl-fips-provider-so is not installed rpm -e --nodeps openssl-fips-provider-so || true && \ @@ -72,7 +73,7 @@ RUN printf "[BaseOS]\\nname=CentOS Linux 9 - BaseOS\\nbaseurl=https://mirror.str RUN dnf install -y --allowerasing \ wget git jq libomp \ # Image processing dependencies (needed for pillow-simd -> habana-media-loader) - zlib-devel libjpeg-devel \ + zlib-devel libjpeg-turbo-devel \ # habanalabs-thunk dependency libfdt-devel \ # Python development @@ -157,10 +158,7 @@ RUN SYNAPSE_REVISION_RUNTIME=$(cat /etc/habanalabs/synapse_revision) && \ export SKIP_INSTALL_DEPENDENCIES=1 && \ PYTHON_INDEX_URL="--extra-index-url ${PYPI_INDEX_URL}" ./install.sh $SYNAPSE_VERSION $SYNAPSE_REVISION_RUNTIME $TORCH_TYPE && \ cd / && \ - rm -rf "${TMP_PATH}" "${PT_PACKAGE_NAME}" - -# System update -RUN dnf -y update --nobest --allowerasing --skip-broken && \ + rm -rf "${TMP_PATH}" "${PT_PACKAGE_NAME}" && \ dnf clean all WORKDIR /workspace @@ -182,10 +180,9 @@ ENV BASE_NAME=${BASE_NAME} \ VLLM_PATH2=/workspace/vllm-gaudi # Install additional system dependencies -RUN dnf update -y --nobest --allowerasing --skip-broken && \ - dnf install -y gettext jq git --allowerasing && \ - ln -sf /usr/bin/python3 /usr/bin/python && \ - dnf clean all +RUN dnf install -y gettext jq git --allowerasing && \ + dnf clean all && \ + ln -sf /usr/bin/python3 /usr/bin/python WORKDIR /root From bf22daf3d1d03f61db4b1b9b07d4823a381699fb Mon Sep 17 00:00:00 2001 From: Adam Ghandoura Date: Wed, 18 Feb 2026 17:15:34 +0100 Subject: [PATCH 5/7] add crb back because of libftd-devel Signed-off-by: Adam Ghandoura --- .cd/Dockerfile.rhel.ubi.vllm | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.cd/Dockerfile.rhel.ubi.vllm b/.cd/Dockerfile.rhel.ubi.vllm index f0bd096638..8ee32a1b24 100644 --- a/.cd/Dockerfile.rhel.ubi.vllm +++ b/.cd/Dockerfile.rhel.ubi.vllm @@ -67,7 +67,8 @@ RUN dnf -y update && \ # Add CentOS repositories for additional packages RUN printf "[BaseOS]\\nname=CentOS Linux 9 - BaseOS\\nbaseurl=https://mirror.stream.centos.org/9-stream/BaseOS/x86_64/os\\ngpgkey=https://www.centos.org/keys/RPM-GPG-KEY-CentOS-Official-SHA256\\ngpgcheck=1\\n" > /etc/yum.repos.d/CentOS-Linux-BaseOS.repo && \ - printf "[centos9]\\nname=CentOS Linux 9 - AppStream\\nbaseurl=https://mirror.stream.centos.org/9-stream/AppStream/x86_64/os\\ngpgkey=https://www.centos.org/keys/RPM-GPG-KEY-CentOS-Official-SHA256\\ngpgcheck=1\\n" > /etc/yum.repos.d/CentOS-Linux-AppStream.repo + printf "[centos9]\\nname=CentOS Linux 9 - AppStream\\nbaseurl=https://mirror.stream.centos.org/9-stream/AppStream/x86_64/os\\ngpgkey=https://www.centos.org/keys/RPM-GPG-KEY-CentOS-Official-SHA256\\ngpgcheck=1\\n" > /etc/yum.repos.d/CentOS-Linux-AppStream.repo && \ + printf "[CRB]\\nname=CentOS Linux 9 - CRB\\nbaseurl=https://mirror.stream.centos.org/9-stream/CRB/x86_64/os\\ngpgkey=https://www.centos.org/keys/RPM-GPG-KEY-CentOS-Official-SHA256\\ngpgcheck=1\\n" > /etc/yum.repos.d/CentOS-Linux-CRB.repo # Install system dependencies RUN dnf install -y --allowerasing \ From f3ebd9b502e2042fc5cf09abd8cfb623cfbfcbac Mon Sep 17 00:00:00 2001 From: Adam Ghandoura Date: Wed, 18 Feb 2026 17:22:05 +0100 Subject: [PATCH 6/7] rename last layer Signed-off-by: Adam Ghandoura --- .cd/Dockerfile.rhel.ubi.vllm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.cd/Dockerfile.rhel.ubi.vllm b/.cd/Dockerfile.rhel.ubi.vllm index 8ee32a1b24..df3dab9e5a 100644 --- a/.cd/Dockerfile.rhel.ubi.vllm +++ b/.cd/Dockerfile.rhel.ubi.vllm @@ -167,7 +167,7 @@ WORKDIR /workspace # ============================================================================ # Stage 3: vllm-final - Install vLLM and configure runtime # ============================================================================ -FROM gaudi-pytorch +FROM gaudi-pytorch as vllm-openai # Re-declare global ARGs needed in this stage ARG VLLM_GAUDI_COMMIT From 8b7fbb617d8c84506a860d131b092f82969c220e Mon Sep 17 00:00:00 2001 From: Adam Ghandoura Date: Wed, 18 Feb 2026 17:54:27 +0100 Subject: [PATCH 7/7] fix typo Signed-off-by: Adam Ghandoura --- .cd/Dockerfile.rhel.ubi.vllm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.cd/Dockerfile.rhel.ubi.vllm b/.cd/Dockerfile.rhel.ubi.vllm index df3dab9e5a..af862eda7f 100644 --- a/.cd/Dockerfile.rhel.ubi.vllm +++ b/.cd/Dockerfile.rhel.ubi.vllm @@ -167,7 +167,7 @@ WORKDIR /workspace # ============================================================================ # Stage 3: vllm-final - Install vLLM and configure runtime # ============================================================================ -FROM gaudi-pytorch as vllm-openai +FROM gaudi-pytorch AS vllm-openai # Re-declare global ARGs needed in this stage ARG VLLM_GAUDI_COMMIT