vllm-project · Harry-Chen · May 25, 2026 · Apr 18, 2026 · Apr 18, 2026 · Apr 18, 2026
@@ -6,6 +6,48 @@ steps:
     timeout_in_minutes: 600
     commands:
     - if [[ "$BUILDKITE_BRANCH" == "main" ]]; then .buildkite/image_build/image_build.sh $REGISTRY $REPO $BUILDKITE_COMMIT $BRANCH $IMAGE_TAG $IMAGE_TAG_LATEST; else .buildkite/image_build/image_build.sh $REGISTRY $REPO $BUILDKITE_COMMIT $BRANCH $IMAGE_TAG; fi
+    # Non-root smoke 1: the default (root) image must still be importable
+    # under a non-root UID via `--user 2000:0`. Validates the `vllm` passwd
+    # entry + group-0-writable /home/vllm + uv path cleanup from #31959.
+    # Uses `import vllm` rather than `vllm serve --help` because the latter
+    # instantiates `VllmConfig` which requires a GPU attached to the
+    # container.
+    - docker run --rm --user 2000:0 --entrypoint python3 "$IMAGE_TAG" -c "import vllm; print(vllm.__version__)"
+    # Non-root smoke 2: assert the non-root enabling invariants are baked
+    # into the image. Runs as UID 2000:0 via a shell so we can verify
+    # filesystem perms + passwd/group file state + wrapper presence without
+    # triggering vLLM's GPU-requiring config-init path. The opt-in
+    # `vllm-openai-nonroot` target adds only `USER vllm`, `WORKDIR
+    # /home/vllm`, and an `ENTRYPOINT` override on top of these invariants;
+    # its build correctness is reviewed at the Dockerfile level. Wrapper
+    # logic is covered separately by the pre-commit hook
+    # `test-nonroot-entrypoint` (see .pre-commit-config.yaml).
+    - |
+      docker run --rm --user 2000:0 --entrypoint /bin/sh "$IMAGE_TAG" -ec '
+        if ! getent passwd 2000 | grep -q ^vllm:; then
+          echo FAIL: UID 2000 != vllm
+          exit 1
+        fi
+        if ! id -gn 2>/dev/null | grep -qx root; then
+          echo FAIL: GID 0 not root group
+          exit 1
+        fi
+        touch /home/vllm/.smoke && rm /home/vllm/.smoke
+        touch /opt/uv/cache/.smoke && rm /opt/uv/cache/.smoke
+        if ! test -x /usr/local/bin/vllm-nonroot-entrypoint.sh; then
+          echo FAIL: wrapper missing
+          exit 1
+        fi
+        if ! test -w /etc/passwd; then
+          echo FAIL: /etc/passwd not group-writable
+          exit 1
+        fi
+        if ! test -w /etc/group; then
+          echo FAIL: /etc/group not group-writable
+          exit 1
+        fi
+        echo non-root invariants OK
+      '
     retry:
       automatic:
         - exit_status: -1  # Agent was lost

@@ -222,6 +222,12 @@ repos:
     name: Update Dockerfile dependency graph
     entry: tools/pre_commit/update-dockerfile-graph.sh
     language: script
+  - id: test-nonroot-entrypoint
+    name: Test non-root entrypoint wrapper
+    entry: bash docker/entrypoints/test_vllm_nonroot_entrypoint.sh
+    language: system
+    pass_filenames: false
+    files: ^docker/entrypoints/(vllm-nonroot-entrypoint|test_vllm_nonroot_entrypoint)\.sh$
   - id: check-forbidden-imports
     name: Check for forbidden imports
     entry: python tools/pre_commit/check_forbidden_imports.py

@@ -105,6 +105,23 @@ ARG BUILD_OS
 
 ENV DEBIAN_FRONTEND=noninteractive
 
+# Environment for uv
+# Declared BEFORE the installer + `uv venv` invocations below so the uv
+# binary, managed Python, download cache, and /opt/venv all land under
+# /opt/uv instead of /root/.local/. Without this, the venv created at
+# build time hardlinks back to /root/.local/share/uv/python and
+# descendants of this stage (`build`, `dev`, `csrc-build`,
+# `extensions-build`) inherit a root-owned, non-root-unreadable layout.
+# See #15174, #15359, #31959. Child stages inherit these via Dockerfile
+# `ENV` unless they override them explicitly.
+ENV UV_HTTP_TIMEOUT=500
+ENV UV_INDEX_STRATEGY="unsafe-best-match"
+ENV UV_PYTHON_INSTALL_DIR=/opt/uv/python
+ENV UV_CACHE_DIR=/opt/uv/cache
+ENV UV_INSTALL_DIR=/opt/uv/bin
+ENV PATH="/opt/venv/bin:/opt/uv/bin:$PATH"
+ENV VIRTUAL_ENV="/opt/venv"
+
 # Install system dependencies including build tools.
 # The Ubuntu path uses apt + deadsnakes-via-uv for Python; the manylinux path
 # (AlmaLinux 8, e.g. pytorch/manylinux2_28-builder) uses dnf and the Python
@@ -145,29 +162,32 @@ RUN if [ "${BUILD_OS}" = "manylinux" ]; then \
 
 # Install uv and bootstrap /opt/venv. Both paths converge on /opt/venv so all
 # downstream stages stay distro-agnostic.
-RUN curl -LsSf https://astral.sh/uv/install.sh | sh \
+RUN mkdir -p "${UV_PYTHON_INSTALL_DIR}" "${UV_CACHE_DIR}" "${UV_INSTALL_DIR}" \
+    && chmod -R a+rX /opt/uv \
+    && curl -LsSf https://astral.sh/uv/install.sh | sh \
+    # `--seed` installs pip/setuptools/wheel into the venv so `python3 -m
+    # pip` works regardless of how uv happens to link the venv back to the
+    # managed Python install (which, at a non-default UV_PYTHON_INSTALL_DIR,
+    # doesn't always expose ensurepip via the default venv layout).
     && if [ "${BUILD_OS}" = "manylinux" ]; then \
            # manylinux images ship Python at /opt/python/cpXY-cpXY/; point uv
            # at the matching interpreter rather than letting it fetch one.
            PYV_NODOT=$(echo ${PYTHON_VERSION} | tr -d '.') \
            && MANYLINUX_PY=/opt/python/cp${PYV_NODOT}-cp${PYV_NODOT}/bin/python${PYTHON_VERSION} \
-           && $HOME/.local/bin/uv venv /opt/venv --python "$MANYLINUX_PY"; \
+           && uv venv --seed /opt/venv --python "$MANYLINUX_PY"; \
        else \
-           $HOME/.local/bin/uv venv /opt/venv --python ${PYTHON_VERSION}; \
+           uv venv --seed /opt/venv --python ${PYTHON_VERSION}; \
        fi \
     && rm -f /usr/bin/python3 /usr/bin/python3-config /usr/bin/pip \
     && ln -sf /opt/venv/bin/python3 /usr/bin/python3 \
     && ln -sf /opt/venv/bin/python3-config /usr/bin/python3-config \
     && ln -sf /opt/venv/bin/pip /usr/bin/pip \
     && python3 --version && python3 -m pip --version
 
-# Activate virtual environment and add uv to PATH
-ENV PATH="/opt/venv/bin:/root/.local/bin:$PATH"
-ENV VIRTUAL_ENV="/opt/venv"
-
-# Environment for uv
-ENV UV_HTTP_TIMEOUT=500
-ENV UV_INDEX_STRATEGY="unsafe-best-match"
+# UV_LINK_MODE=copy applies to subsequent `uv pip install` RUNs (avoids
+# hardlink failures with BuildKit cache mounts); it must not be set during
+# `uv venv` above, which relies on hardlinking /opt/venv back to the
+# managed Python source so ensurepip / `python3 -m pip` still resolve.
 ENV UV_LINK_MODE=copy
 
 # Verify GCC version
@@ -198,7 +218,7 @@ COPY requirements/common.txt requirements/common.txt
 COPY requirements/cuda.txt requirements/cuda.txt
 COPY use_existing_torch.py use_existing_torch.py
 COPY pyproject.toml pyproject.toml
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
     if [ "$(echo $CUDA_VERSION | cut -d. -f1)" = "12" ]; then \
         sed -i 's/^nvidia-cutlass-dsl\[cu13\]>=/nvidia-cutlass-dsl>=/' requirements/cuda.txt; \
     fi \
@@ -218,7 +238,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
 # Track PyTorch lib versions used during build and match in downstream instances.
 # We do this for both nightly and release so we can strip dependencies/*.txt as needed.
 # Otherwise library dependencies can upgrade/downgrade torch incorrectly.
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
     uv pip freeze | grep -i "^torch=\|^torchvision=\|^torchaudio=" > torch_lib_versions.txt \
     && TORCH_LIB_VERSIONS=$(cat torch_lib_versions.txt | xargs) \
     && echo "Installed torch libs: ${TORCH_LIB_VERSIONS}"
@@ -304,7 +324,7 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match"
 # Use copy mode to avoid hardlink failures with Docker cache mounts
 ENV UV_LINK_MODE=copy
 
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
     if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
         echo "Installing build requirements without torch..." \
         && python3 use_existing_torch.py --prefix \
@@ -349,7 +369,7 @@ ARG VLLM_MAIN_CUDA_VERSION=""
 ENV SETUPTOOLS_SCM_PRETEND_VERSION="0.0.0+csrc.build"
 
 # Use existing torch for nightly builds
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
     if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
         python3 use_existing_torch.py --prefix; \
     fi
@@ -365,7 +385,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
 # Build the vLLM wheel
 # if USE_SCCACHE is set, use sccache to speed up compilation
 # AWS credentials mounted at ~/.aws/credentials for sccache S3 auth (optional)
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
     --mount=type=secret,id=aws-credentials,target=/root/.aws/credentials,required=false \
     if [ "$USE_SCCACHE" = "1" ]; then \
         echo "Installing sccache..." \
@@ -399,7 +419,7 @@ ARG vllm_target_device="cuda"
 ENV VLLM_TARGET_DEVICE=${vllm_target_device}
 ENV CCACHE_DIR=/root/.cache/ccache
 RUN --mount=type=cache,target=/root/.cache/ccache \
-    --mount=type=cache,target=/root/.cache/uv \
+    --mount=type=cache,target=/opt/uv/cache \
     if [ "$USE_SCCACHE" != "1" ]; then \
         # Clean any existing CMake artifacts
         rm -rf .deps && \
@@ -431,7 +451,7 @@ COPY tools/ep_kernels/install_python_libraries.sh /tmp/install_python_libraries.
 # Defaults moved here from tools/ep_kernels/install_python_libraries.sh for centralized version management
 ARG DEEPEP_COMMIT_HASH=73b6ea4
 ARG NVSHMEM_VER
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
     mkdir -p /tmp/ep_kernels_workspace/dist && \
     export TORCH_CUDA_ARCH_LIST='9.0a 10.0a' && \
     /tmp/install_python_libraries.sh \
@@ -465,7 +485,7 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match"
 # Use copy mode to avoid hardlink failures with Docker cache mounts
 ENV UV_LINK_MODE=copy
 
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
     if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
         echo "Installing build requirements without torch..." \
         && python3 use_existing_torch.py --prefix \
@@ -500,13 +520,13 @@ ENV VLLM_TARGET_DEVICE=${vllm_target_device}
 ENV VLLM_SKIP_PRECOMPILED_VERSION_SUFFIX=1
 
 # Use existing torch for nightly builds
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
     if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
         python3 use_existing_torch.py --prefix; \
     fi
 
 # Build the vLLM wheel
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
     --mount=type=bind,source=.git,target=.git \
     if [ "${vllm_target_device}" = "cuda" ]; then \
         export VLLM_USE_PRECOMPILED=1; \
@@ -564,7 +584,7 @@ COPY requirements/test/cuda.txt requirements/test/cuda.txt
 COPY requirements/dev.txt requirements/dev.txt
 COPY use_existing_torch.py use_existing_torch.py
 COPY --from=base /workspace/torch_lib_versions.txt torch_lib_versions.txt
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
     if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
         echo "Installing dev requirements plus torch nightly..." \
         && python3 use_existing_torch.py --prefix \
@@ -664,9 +684,50 @@ RUN CUDA_VERSION_DASH=$(echo $CUDA_VERSION | cut -d. -f1,2 | tr '.' '-') && \
 RUN python3 -m pip install uv
 
 # Environment for uv
+# Redirect uv's managed Python and download cache out of /root/ so downstream
+# images (`FROM vllm/vllm-openai` + `USER <uid>`) and direct non-root runs
+# (`docker run --user <uid>:<gid>`) can read and execute them. See #15174,
+# #15359, #31959.
 ENV UV_HTTP_TIMEOUT=500
 ENV UV_INDEX_STRATEGY="unsafe-best-match"
 ENV UV_LINK_MODE=copy
+ENV UV_PYTHON_INSTALL_DIR=/opt/uv/python
+ENV UV_CACHE_DIR=/opt/uv/cache
+RUN mkdir -p "${UV_PYTHON_INSTALL_DIR}" "${UV_CACHE_DIR}" \
+    && chgrp -R 0 /opt/uv \
+    && chmod -R g+rwX,a+rX /opt/uv
+
+# ----------------------------------------------------------------------
+# Non-root support (opt-in)
+# ----------------------------------------------------------------------
+# Create a conventional `vllm` user (UID 2000, GID 0) so the image can be
+# run under `--user 2000:0` or the opt-in `vllm-openai-nonroot` target.
+#
+# Design notes:
+#   * GID 0 + group-writable cache dirs follow the OpenShift arbitrary-UID
+#     pattern, so any UID that is a member of group 0 at runtime can write
+#     to /home/vllm and /opt/uv without additional chown work.
+#   * The default `vllm-openai` image keeps `USER root`, so every existing
+#     `docker run vllm/vllm-openai ...` / K8s manifest / `FROM vllm/vllm-openai`
+#     + `RUN uv pip install --system ...` flow is unchanged.
+#   * The entrypoint wrapper below is only used by `vllm-openai-nonroot`; it
+#     handles the OpenShift arbitrary-UID case (UID not in /etc/passwd).
+# See #31959 and docs/deployment/docker.md.
+RUN useradd --uid 2000 --gid 0 --create-home --home-dir /home/vllm \
+        --shell /bin/bash vllm \
+    && mkdir -p /home/vllm/.cache /home/vllm/.config \
+    && chown -R 2000:0 /home/vllm \
+    && chmod -R g+rwX /home/vllm \
+    # Allow the entrypoint wrapper to append a /etc/passwd entry for an
+    # arbitrary runtime UID that shares GID 0. Without this, `whoami`, bash's
+    # `\u` prompt, `id -un`, and anything else that calls `getpwuid()`
+    # directly return "I have no name!" for OpenShift-style arbitrary UIDs.
+    # This matches the convention used by Red Hat UBI base images.
+    && chgrp 0 /etc/passwd /etc/group \
+    && chmod g=u /etc/passwd /etc/group
+COPY docker/entrypoints/vllm-nonroot-entrypoint.sh \
+    /usr/local/bin/vllm-nonroot-entrypoint.sh
+RUN chmod 0755 /usr/local/bin/vllm-nonroot-entrypoint.sh
 
 # Enable CUDA forward compatibility by setting '-e VLLM_ENABLE_CUDA_COMPATIBILITY=1'
 # Only needed for datacenter/professional GPUs with older drivers.
@@ -683,7 +744,7 @@ ENV VLLM_ENABLE_CUDA_COMPATIBILITY=0
 ARG PYTORCH_CUDA_INDEX_BASE_URL
 COPY requirements/common.txt /tmp/common.txt
 COPY requirements/cuda.txt /tmp/requirements-cuda.txt
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
     if [ "$(echo $CUDA_VERSION | cut -d. -f1)" = "12" ]; then \
         sed -i 's/^nvidia-cutlass-dsl\[cu13\]>=/nvidia-cutlass-dsl>=/' /tmp/requirements-cuda.txt; \
     fi && \
@@ -695,7 +756,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
 # https://docs.flashinfer.ai/installation.html
 # From versions.json: .flashinfer.version
 ARG FLASHINFER_VERSION=0.6.11.post2
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
     uv pip install --system flashinfer-jit-cache==${FLASHINFER_VERSION} \
         --extra-index-url https://flashinfer.ai/whl/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
 
@@ -727,7 +788,7 @@ ARG BITSANDBYTES_VERSION_X86=0.46.1
 ARG BITSANDBYTES_VERSION_ARM64=0.42.0
 ARG TIMM_VERSION=">=1.0.17"
 ARG RUNAI_MODEL_STREAMER_VERSION=">=0.15.7"
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
     if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
         BITSANDBYTES_VERSION="${BITSANDBYTES_VERSION_ARM64}"; \
     else \
@@ -752,7 +813,7 @@ ARG PYTORCH_NIGHTLY
 # Check whether to install torch nightly instead of release for this build.
 COPY --from=base /workspace/torch_lib_versions.txt torch_lib_versions.txt
 RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \
-    --mount=type=cache,target=/root/.cache/uv \
+    --mount=type=cache,target=/opt/uv/cache \
     if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
         echo "Installing torch nightly..." \
         && uv pip install --system $(cat torch_lib_versions.txt | xargs) --pre \
@@ -766,7 +827,7 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist
         --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
     fi
 
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
 . /etc/environment && \
 uv pip list
 
@@ -775,7 +836,7 @@ ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
 
 # Install EP kernels wheels (DeepEP) that have been built in the `build` stage
 RUN --mount=type=bind,from=build,src=/tmp/ep_kernels_workspace/dist,target=/vllm-workspace/ep_kernels/dist \
-    --mount=type=cache,target=/root/.cache/uv \
+    --mount=type=cache,target=/opt/uv/cache \
     uv pip install --system ep_kernels/dist/*.whl --verbose \
         --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
 
@@ -830,7 +891,7 @@ COPY requirements/test/cuda.txt requirements/test/cuda.txt
 COPY requirements/dev.txt requirements/dev.txt
 COPY use_existing_torch.py use_existing_torch.py
 COPY --from=base /workspace/torch_lib_versions.txt torch_lib_versions.txt
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
     CUDA_MAJOR="${CUDA_VERSION%%.*}"; \
     if [ "$CUDA_MAJOR" -ge 12 ]; then \
         if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
@@ -850,7 +911,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
     fi
 
 # install development dependencies (for testing)
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
     uv pip install --system -e tests/vllm_test_utils
 
 # enable fast downloads from hf (for testing)
@@ -890,7 +951,7 @@ ENV UV_HTTP_TIMEOUT=500
 # install kv_connectors if requested
 ARG torch_cuda_arch_list='7.5 8.0 8.6 8.9 9.0 10.0 11.0 12.0+PTX'
 ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
     --mount=type=bind,source=requirements/kv_connectors.txt,target=/tmp/kv_connectors.txt,ro \
     CUDA_MAJOR="${CUDA_VERSION%%.*}"; \
     CUDA_VERSION_DASH=$(echo $CUDA_VERSION | cut -d. -f1,2 | tr '.' '-'); \
@@ -958,5 +1019,32 @@ ENTRYPOINT ["./sagemaker-entrypoint.sh"]
 
 FROM vllm-openai-base AS vllm-openai
 
+# To run the image as non-root, either build the `vllm-openai-nonroot` target
+# below, or in a derived Dockerfile uncomment the following line and ensure
+# any additional layers chgrp-0 / chmod-g+rwX paths they write to. The `vllm`
+# user (UID 2000, GID 0) is already created in the `vllm-base` stage.
+# See docs/deployment/docker.md.
+# USER vllm
 ENTRYPOINT ["vllm", "serve"]
 #################### OPENAI API SERVER ####################
+
+#################### OPENAI API SERVER (NON-ROOT, OPT-IN) ####################
+# Non-root-ready variant of `vllm-openai`. Built via:
+#   docker build --target vllm-openai-nonroot -t vllm:openai-nonroot \
+#       -f docker/Dockerfile .
+#
+# Runtime behavior:
+#   * Default USER is `vllm` (UID 2000, GID 0) created in `vllm-base`.
+#   * HOME is /home/vllm, pre-created group-0-writable so arbitrary UIDs in
+#     group 0 (OpenShift / `--user <uid>:0`) can also use the image.
+#   * Entrypoint wrapper handles the "UID not in /etc/passwd" case for truly
+#     arbitrary UIDs by falling back HOME/USER to sane writable defaults.
+#   * All cache/config envs (HF_HOME, VLLM_CACHE_ROOT, TRITON_CACHE_DIR, ...)
+#     remain unset so their library defaults resolve to $HOME/.cache/... ,
+#     which is writable.
+FROM vllm-openai AS vllm-openai-nonroot
+
+USER vllm
+WORKDIR /home/vllm
+ENTRYPOINT ["/usr/local/bin/vllm-nonroot-entrypoint.sh"]
+#################### OPENAI API SERVER (NON-ROOT, OPT-IN) ####################