Skip to content
Merged
42 changes: 42 additions & 0 deletions .buildkite/image_build/image_build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,48 @@ steps:
timeout_in_minutes: 600
commands:
- if [[ "$BUILDKITE_BRANCH" == "main" ]]; then .buildkite/image_build/image_build.sh $REGISTRY $REPO $BUILDKITE_COMMIT $BRANCH $IMAGE_TAG $IMAGE_TAG_LATEST; else .buildkite/image_build/image_build.sh $REGISTRY $REPO $BUILDKITE_COMMIT $BRANCH $IMAGE_TAG; fi
# Non-root smoke 1: the default (root) image must still be importable
# under a non-root UID via `--user 2000:0`. Validates the `vllm` passwd
# entry + group-0-writable /home/vllm + uv path cleanup from #31959.
# Uses `import vllm` rather than `vllm serve --help` because the latter
# instantiates `VllmConfig` which requires a GPU attached to the
# container.
- docker run --rm --user 2000:0 --entrypoint python3 "$IMAGE_TAG" -c "import vllm; print(vllm.__version__)"
# Non-root smoke 2: assert the non-root enabling invariants are baked
# into the image. Runs as UID 2000:0 via a shell so we can verify
# filesystem perms + passwd/group file state + wrapper presence without
# triggering vLLM's GPU-requiring config-init path. The opt-in
# `vllm-openai-nonroot` target adds only `USER vllm`, `WORKDIR
# /home/vllm`, and an `ENTRYPOINT` override on top of these invariants;
# its build correctness is reviewed at the Dockerfile level. Wrapper
# logic is covered separately by the pre-commit hook
# `test-nonroot-entrypoint` (see .pre-commit-config.yaml).
- |
docker run --rm --user 2000:0 --entrypoint /bin/sh "$IMAGE_TAG" -ec '
if ! getent passwd 2000 | grep -q ^vllm:; then
echo FAIL: UID 2000 != vllm
exit 1
fi
if ! id -gn 2>/dev/null | grep -qx root; then
echo FAIL: GID 0 not root group
exit 1
fi
touch /home/vllm/.smoke && rm /home/vllm/.smoke
touch /opt/uv/cache/.smoke && rm /opt/uv/cache/.smoke
if ! test -x /usr/local/bin/vllm-nonroot-entrypoint.sh; then
echo FAIL: wrapper missing
exit 1
fi
if ! test -w /etc/passwd; then
echo FAIL: /etc/passwd not group-writable
exit 1
fi
if ! test -w /etc/group; then
echo FAIL: /etc/group not group-writable
exit 1
fi
echo non-root invariants OK
'
retry:
automatic:
- exit_status: -1 # Agent was lost
Expand Down
6 changes: 6 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,12 @@ repos:
name: Update Dockerfile dependency graph
entry: tools/pre_commit/update-dockerfile-graph.sh
language: script
- id: test-nonroot-entrypoint
name: Test non-root entrypoint wrapper
entry: bash docker/entrypoints/test_vllm_nonroot_entrypoint.sh
language: system
pass_filenames: false
files: ^docker/entrypoints/(vllm-nonroot-entrypoint|test_vllm_nonroot_entrypoint)\.sh$
- id: check-forbidden-imports
name: Check for forbidden imports
entry: python tools/pre_commit/check_forbidden_imports.py
Expand Down
148 changes: 118 additions & 30 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,23 @@ ARG BUILD_OS

ENV DEBIAN_FRONTEND=noninteractive

# Environment for uv
# Declared BEFORE the installer + `uv venv` invocations below so the uv
# binary, managed Python, download cache, and /opt/venv all land under
# /opt/uv instead of /root/.local/. Without this, the venv created at
# build time hardlinks back to /root/.local/share/uv/python and
# descendants of this stage (`build`, `dev`, `csrc-build`,
# `extensions-build`) inherit a root-owned, non-root-unreadable layout.
# See #15174, #15359, #31959. Child stages inherit these via Dockerfile
# `ENV` unless they override them explicitly.
ENV UV_HTTP_TIMEOUT=500
ENV UV_INDEX_STRATEGY="unsafe-best-match"
ENV UV_PYTHON_INSTALL_DIR=/opt/uv/python
ENV UV_CACHE_DIR=/opt/uv/cache
ENV UV_INSTALL_DIR=/opt/uv/bin
ENV PATH="/opt/venv/bin:/opt/uv/bin:$PATH"
ENV VIRTUAL_ENV="/opt/venv"

# Install system dependencies including build tools.
# The Ubuntu path uses apt + deadsnakes-via-uv for Python; the manylinux path
# (AlmaLinux 8, e.g. pytorch/manylinux2_28-builder) uses dnf and the Python
Expand Down Expand Up @@ -145,29 +162,32 @@ RUN if [ "${BUILD_OS}" = "manylinux" ]; then \

# Install uv and bootstrap /opt/venv. Both paths converge on /opt/venv so all
# downstream stages stay distro-agnostic.
RUN curl -LsSf https://astral.sh/uv/install.sh | sh \
RUN mkdir -p "${UV_PYTHON_INSTALL_DIR}" "${UV_CACHE_DIR}" "${UV_INSTALL_DIR}" \
&& chmod -R a+rX /opt/uv \
&& curl -LsSf https://astral.sh/uv/install.sh | sh \
# `--seed` installs pip/setuptools/wheel into the venv so `python3 -m
# pip` works regardless of how uv happens to link the venv back to the
# managed Python install (which, at a non-default UV_PYTHON_INSTALL_DIR,
# doesn't always expose ensurepip via the default venv layout).
&& if [ "${BUILD_OS}" = "manylinux" ]; then \
# manylinux images ship Python at /opt/python/cpXY-cpXY/; point uv
# at the matching interpreter rather than letting it fetch one.
PYV_NODOT=$(echo ${PYTHON_VERSION} | tr -d '.') \
&& MANYLINUX_PY=/opt/python/cp${PYV_NODOT}-cp${PYV_NODOT}/bin/python${PYTHON_VERSION} \
&& $HOME/.local/bin/uv venv /opt/venv --python "$MANYLINUX_PY"; \
&& uv venv --seed /opt/venv --python "$MANYLINUX_PY"; \
else \
$HOME/.local/bin/uv venv /opt/venv --python ${PYTHON_VERSION}; \
uv venv --seed /opt/venv --python ${PYTHON_VERSION}; \
fi \
&& rm -f /usr/bin/python3 /usr/bin/python3-config /usr/bin/pip \
&& ln -sf /opt/venv/bin/python3 /usr/bin/python3 \
&& ln -sf /opt/venv/bin/python3-config /usr/bin/python3-config \
&& ln -sf /opt/venv/bin/pip /usr/bin/pip \
&& python3 --version && python3 -m pip --version

# Activate virtual environment and add uv to PATH
ENV PATH="/opt/venv/bin:/root/.local/bin:$PATH"
ENV VIRTUAL_ENV="/opt/venv"

# Environment for uv
ENV UV_HTTP_TIMEOUT=500
ENV UV_INDEX_STRATEGY="unsafe-best-match"
# UV_LINK_MODE=copy applies to subsequent `uv pip install` RUNs (avoids
# hardlink failures with BuildKit cache mounts); it must not be set during
# `uv venv` above, which relies on hardlinking /opt/venv back to the
# managed Python source so ensurepip / `python3 -m pip` still resolve.
ENV UV_LINK_MODE=copy

# Verify GCC version
Expand Down Expand Up @@ -198,7 +218,7 @@ COPY requirements/common.txt requirements/common.txt
COPY requirements/cuda.txt requirements/cuda.txt
COPY use_existing_torch.py use_existing_torch.py
COPY pyproject.toml pyproject.toml
RUN --mount=type=cache,target=/root/.cache/uv \
RUN --mount=type=cache,target=/opt/uv/cache \
if [ "$(echo $CUDA_VERSION | cut -d. -f1)" = "12" ]; then \
sed -i 's/^nvidia-cutlass-dsl\[cu13\]>=/nvidia-cutlass-dsl>=/' requirements/cuda.txt; \
fi \
Expand All @@ -218,7 +238,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
# Track PyTorch lib versions used during build and match in downstream instances.
# We do this for both nightly and release so we can strip dependencies/*.txt as needed.
# Otherwise library dependencies can upgrade/downgrade torch incorrectly.
RUN --mount=type=cache,target=/root/.cache/uv \
RUN --mount=type=cache,target=/opt/uv/cache \
uv pip freeze | grep -i "^torch=\|^torchvision=\|^torchaudio=" > torch_lib_versions.txt \
&& TORCH_LIB_VERSIONS=$(cat torch_lib_versions.txt | xargs) \
&& echo "Installed torch libs: ${TORCH_LIB_VERSIONS}"
Expand Down Expand Up @@ -304,7 +324,7 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match"
# Use copy mode to avoid hardlink failures with Docker cache mounts
ENV UV_LINK_MODE=copy

RUN --mount=type=cache,target=/root/.cache/uv \
RUN --mount=type=cache,target=/opt/uv/cache \
if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
echo "Installing build requirements without torch..." \
&& python3 use_existing_torch.py --prefix \
Expand Down Expand Up @@ -349,7 +369,7 @@ ARG VLLM_MAIN_CUDA_VERSION=""
ENV SETUPTOOLS_SCM_PRETEND_VERSION="0.0.0+csrc.build"

# Use existing torch for nightly builds
RUN --mount=type=cache,target=/root/.cache/uv \
RUN --mount=type=cache,target=/opt/uv/cache \
if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
python3 use_existing_torch.py --prefix; \
fi
Expand All @@ -365,7 +385,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
# Build the vLLM wheel
# if USE_SCCACHE is set, use sccache to speed up compilation
# AWS credentials mounted at ~/.aws/credentials for sccache S3 auth (optional)
RUN --mount=type=cache,target=/root/.cache/uv \
RUN --mount=type=cache,target=/opt/uv/cache \
--mount=type=secret,id=aws-credentials,target=/root/.aws/credentials,required=false \
if [ "$USE_SCCACHE" = "1" ]; then \
echo "Installing sccache..." \
Expand Down Expand Up @@ -399,7 +419,7 @@ ARG vllm_target_device="cuda"
ENV VLLM_TARGET_DEVICE=${vllm_target_device}
ENV CCACHE_DIR=/root/.cache/ccache
RUN --mount=type=cache,target=/root/.cache/ccache \
--mount=type=cache,target=/root/.cache/uv \
--mount=type=cache,target=/opt/uv/cache \
if [ "$USE_SCCACHE" != "1" ]; then \
# Clean any existing CMake artifacts
rm -rf .deps && \
Expand Down Expand Up @@ -431,7 +451,7 @@ COPY tools/ep_kernels/install_python_libraries.sh /tmp/install_python_libraries.
# Defaults moved here from tools/ep_kernels/install_python_libraries.sh for centralized version management
ARG DEEPEP_COMMIT_HASH=73b6ea4
ARG NVSHMEM_VER
RUN --mount=type=cache,target=/root/.cache/uv \
RUN --mount=type=cache,target=/opt/uv/cache \
mkdir -p /tmp/ep_kernels_workspace/dist && \
export TORCH_CUDA_ARCH_LIST='9.0a 10.0a' && \
/tmp/install_python_libraries.sh \
Expand Down Expand Up @@ -465,7 +485,7 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match"
# Use copy mode to avoid hardlink failures with Docker cache mounts
ENV UV_LINK_MODE=copy

RUN --mount=type=cache,target=/root/.cache/uv \
RUN --mount=type=cache,target=/opt/uv/cache \
if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
echo "Installing build requirements without torch..." \
&& python3 use_existing_torch.py --prefix \
Expand Down Expand Up @@ -500,13 +520,13 @@ ENV VLLM_TARGET_DEVICE=${vllm_target_device}
ENV VLLM_SKIP_PRECOMPILED_VERSION_SUFFIX=1

# Use existing torch for nightly builds
RUN --mount=type=cache,target=/root/.cache/uv \
RUN --mount=type=cache,target=/opt/uv/cache \
if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
python3 use_existing_torch.py --prefix; \
fi

# Build the vLLM wheel
RUN --mount=type=cache,target=/root/.cache/uv \
RUN --mount=type=cache,target=/opt/uv/cache \
--mount=type=bind,source=.git,target=.git \
if [ "${vllm_target_device}" = "cuda" ]; then \
export VLLM_USE_PRECOMPILED=1; \
Expand Down Expand Up @@ -564,7 +584,7 @@ COPY requirements/test/cuda.txt requirements/test/cuda.txt
COPY requirements/dev.txt requirements/dev.txt
COPY use_existing_torch.py use_existing_torch.py
COPY --from=base /workspace/torch_lib_versions.txt torch_lib_versions.txt
RUN --mount=type=cache,target=/root/.cache/uv \
RUN --mount=type=cache,target=/opt/uv/cache \
if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
echo "Installing dev requirements plus torch nightly..." \
&& python3 use_existing_torch.py --prefix \
Expand Down Expand Up @@ -664,9 +684,50 @@ RUN CUDA_VERSION_DASH=$(echo $CUDA_VERSION | cut -d. -f1,2 | tr '.' '-') && \
RUN python3 -m pip install uv

# Environment for uv
# Redirect uv's managed Python and download cache out of /root/ so downstream
# images (`FROM vllm/vllm-openai` + `USER <uid>`) and direct non-root runs
# (`docker run --user <uid>:<gid>`) can read and execute them. See #15174,
# #15359, #31959.
ENV UV_HTTP_TIMEOUT=500
ENV UV_INDEX_STRATEGY="unsafe-best-match"
ENV UV_LINK_MODE=copy
ENV UV_PYTHON_INSTALL_DIR=/opt/uv/python
ENV UV_CACHE_DIR=/opt/uv/cache
RUN mkdir -p "${UV_PYTHON_INSTALL_DIR}" "${UV_CACHE_DIR}" \
&& chgrp -R 0 /opt/uv \
&& chmod -R g+rwX,a+rX /opt/uv

# ----------------------------------------------------------------------
# Non-root support (opt-in)
# ----------------------------------------------------------------------
# Create a conventional `vllm` user (UID 2000, GID 0) so the image can be
# run under `--user 2000:0` or the opt-in `vllm-openai-nonroot` target.
#
# Design notes:
# * GID 0 + group-writable cache dirs follow the OpenShift arbitrary-UID
# pattern, so any UID that is a member of group 0 at runtime can write
# to /home/vllm and /opt/uv without additional chown work.
# * The default `vllm-openai` image keeps `USER root`, so every existing
# `docker run vllm/vllm-openai ...` / K8s manifest / `FROM vllm/vllm-openai`
# + `RUN uv pip install --system ...` flow is unchanged.
# * The entrypoint wrapper below is only used by `vllm-openai-nonroot`; it
# handles the OpenShift arbitrary-UID case (UID not in /etc/passwd).
# See #31959 and docs/deployment/docker.md.
RUN useradd --uid 2000 --gid 0 --create-home --home-dir /home/vllm \
--shell /bin/bash vllm \
&& mkdir -p /home/vllm/.cache /home/vllm/.config \
&& chown -R 2000:0 /home/vllm \
&& chmod -R g+rwX /home/vllm \
# Allow the entrypoint wrapper to append a /etc/passwd entry for an
# arbitrary runtime UID that shares GID 0. Without this, `whoami`, bash's
# `\u` prompt, `id -un`, and anything else that calls `getpwuid()`
# directly return "I have no name!" for OpenShift-style arbitrary UIDs.
# This matches the convention used by Red Hat UBI base images.
&& chgrp 0 /etc/passwd /etc/group \
&& chmod g=u /etc/passwd /etc/group
COPY docker/entrypoints/vllm-nonroot-entrypoint.sh \
/usr/local/bin/vllm-nonroot-entrypoint.sh
RUN chmod 0755 /usr/local/bin/vllm-nonroot-entrypoint.sh

# Enable CUDA forward compatibility by setting '-e VLLM_ENABLE_CUDA_COMPATIBILITY=1'
# Only needed for datacenter/professional GPUs with older drivers.
Expand All @@ -683,7 +744,7 @@ ENV VLLM_ENABLE_CUDA_COMPATIBILITY=0
ARG PYTORCH_CUDA_INDEX_BASE_URL
COPY requirements/common.txt /tmp/common.txt
COPY requirements/cuda.txt /tmp/requirements-cuda.txt
RUN --mount=type=cache,target=/root/.cache/uv \
RUN --mount=type=cache,target=/opt/uv/cache \
if [ "$(echo $CUDA_VERSION | cut -d. -f1)" = "12" ]; then \
sed -i 's/^nvidia-cutlass-dsl\[cu13\]>=/nvidia-cutlass-dsl>=/' /tmp/requirements-cuda.txt; \
fi && \
Expand All @@ -695,7 +756,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
# https://docs.flashinfer.ai/installation.html
# From versions.json: .flashinfer.version
ARG FLASHINFER_VERSION=0.6.11.post2
RUN --mount=type=cache,target=/root/.cache/uv \
RUN --mount=type=cache,target=/opt/uv/cache \
uv pip install --system flashinfer-jit-cache==${FLASHINFER_VERSION} \
--extra-index-url https://flashinfer.ai/whl/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')

Expand Down Expand Up @@ -727,7 +788,7 @@ ARG BITSANDBYTES_VERSION_X86=0.46.1
ARG BITSANDBYTES_VERSION_ARM64=0.42.0
ARG TIMM_VERSION=">=1.0.17"
ARG RUNAI_MODEL_STREAMER_VERSION=">=0.15.7"
RUN --mount=type=cache,target=/root/.cache/uv \
RUN --mount=type=cache,target=/opt/uv/cache \
if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
BITSANDBYTES_VERSION="${BITSANDBYTES_VERSION_ARM64}"; \
else \
Expand All @@ -752,7 +813,7 @@ ARG PYTORCH_NIGHTLY
# Check whether to install torch nightly instead of release for this build.
COPY --from=base /workspace/torch_lib_versions.txt torch_lib_versions.txt
RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \
--mount=type=cache,target=/root/.cache/uv \
--mount=type=cache,target=/opt/uv/cache \
if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
echo "Installing torch nightly..." \
&& uv pip install --system $(cat torch_lib_versions.txt | xargs) --pre \
Expand All @@ -766,7 +827,7 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
fi

RUN --mount=type=cache,target=/root/.cache/uv \
RUN --mount=type=cache,target=/opt/uv/cache \
. /etc/environment && \
uv pip list

Expand All @@ -775,7 +836,7 @@ ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH

# Install EP kernels wheels (DeepEP) that have been built in the `build` stage
RUN --mount=type=bind,from=build,src=/tmp/ep_kernels_workspace/dist,target=/vllm-workspace/ep_kernels/dist \
--mount=type=cache,target=/root/.cache/uv \
--mount=type=cache,target=/opt/uv/cache \
uv pip install --system ep_kernels/dist/*.whl --verbose \
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')

Expand Down Expand Up @@ -830,7 +891,7 @@ COPY requirements/test/cuda.txt requirements/test/cuda.txt
COPY requirements/dev.txt requirements/dev.txt
COPY use_existing_torch.py use_existing_torch.py
COPY --from=base /workspace/torch_lib_versions.txt torch_lib_versions.txt
RUN --mount=type=cache,target=/root/.cache/uv \
RUN --mount=type=cache,target=/opt/uv/cache \
CUDA_MAJOR="${CUDA_VERSION%%.*}"; \
if [ "$CUDA_MAJOR" -ge 12 ]; then \
if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
Expand All @@ -850,7 +911,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
fi

# install development dependencies (for testing)
RUN --mount=type=cache,target=/root/.cache/uv \
RUN --mount=type=cache,target=/opt/uv/cache \
uv pip install --system -e tests/vllm_test_utils

# enable fast downloads from hf (for testing)
Expand Down Expand Up @@ -890,7 +951,7 @@ ENV UV_HTTP_TIMEOUT=500
# install kv_connectors if requested
ARG torch_cuda_arch_list='7.5 8.0 8.6 8.9 9.0 10.0 11.0 12.0+PTX'
ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
RUN --mount=type=cache,target=/root/.cache/uv \
RUN --mount=type=cache,target=/opt/uv/cache \
--mount=type=bind,source=requirements/kv_connectors.txt,target=/tmp/kv_connectors.txt,ro \
CUDA_MAJOR="${CUDA_VERSION%%.*}"; \
CUDA_VERSION_DASH=$(echo $CUDA_VERSION | cut -d. -f1,2 | tr '.' '-'); \
Expand Down Expand Up @@ -958,5 +1019,32 @@ ENTRYPOINT ["./sagemaker-entrypoint.sh"]

FROM vllm-openai-base AS vllm-openai

# To run the image as non-root, either build the `vllm-openai-nonroot` target
# below, or in a derived Dockerfile uncomment the following line and ensure
# any additional layers chgrp-0 / chmod-g+rwX paths they write to. The `vllm`
# user (UID 2000, GID 0) is already created in the `vllm-base` stage.
# See docs/deployment/docker.md.
# USER vllm
ENTRYPOINT ["vllm", "serve"]
#################### OPENAI API SERVER ####################

#################### OPENAI API SERVER (NON-ROOT, OPT-IN) ####################
# Non-root-ready variant of `vllm-openai`. Built via:
# docker build --target vllm-openai-nonroot -t vllm:openai-nonroot \
# -f docker/Dockerfile .
#
# Runtime behavior:
# * Default USER is `vllm` (UID 2000, GID 0) created in `vllm-base`.
# * HOME is /home/vllm, pre-created group-0-writable so arbitrary UIDs in
# group 0 (OpenShift / `--user <uid>:0`) can also use the image.
# * Entrypoint wrapper handles the "UID not in /etc/passwd" case for truly
# arbitrary UIDs by falling back HOME/USER to sane writable defaults.
# * All cache/config envs (HF_HOME, VLLM_CACHE_ROOT, TRITON_CACHE_DIR, ...)
# remain unset so their library defaults resolve to $HOME/.cache/... ,
# which is writable.
FROM vllm-openai AS vllm-openai-nonroot

USER vllm
WORKDIR /home/vllm
ENTRYPOINT ["/usr/local/bin/vllm-nonroot-entrypoint.sh"]
#################### OPENAI API SERVER (NON-ROOT, OPT-IN) ####################
Loading
Loading