From c1f80016438d36602da3631a26a900232fb0f1f8 Mon Sep 17 00:00:00 2001 From: tzhouam Date: Fri, 20 Mar 2026 06:27:11 +0000 Subject: [PATCH 1/2] [Update] Bump vLLM version to 0.18.0 across Dockerfiles and documentation - Updated VLLM_VERSION in pipeline-intel.yaml and Dockerfiles for CI, ROCm, and XPU to 0.18.0. - Modified installation instructions in quickstart.md, gpu.md, cuda.inc.md, and rocm.inc.md to reflect the new version. - Adjusted pre-built wheel availability note in gpu.md to include version 0.18.0. - Updated Docker image tags in documentation to use v0.18.0. Signed-off-by: tzhouam --- .buildkite/pipeline-intel.yaml | 2 +- docker/Dockerfile.ci | 2 +- docker/Dockerfile.rocm | 4 ++-- docker/Dockerfile.xpu | 2 +- docs/getting_started/installation/gpu.md | 2 +- docs/getting_started/installation/gpu/cuda.inc.md | 12 ++++++------ docs/getting_started/installation/gpu/rocm.inc.md | 14 +++++++------- docs/getting_started/quickstart.md | 4 ++-- 8 files changed, 21 insertions(+), 21 deletions(-) diff --git a/.buildkite/pipeline-intel.yaml b/.buildkite/pipeline-intel.yaml index bd9c9daabec..4334dd516b4 100644 --- a/.buildkite/pipeline-intel.yaml +++ b/.buildkite/pipeline-intel.yaml @@ -10,7 +10,7 @@ steps: DOCKER_BUILDKIT: "1" # Buildkite will automatically replace this with the actual commit hash VLLM_IMAGE_TAG: "${BUILDKITE_COMMIT}" - VLLM_VERSION: "v0.17.0" + VLLM_VERSION: "v0.18.0" priority: 100 timeout_in_minutes: 60 soft_fail: true diff --git a/docker/Dockerfile.ci b/docker/Dockerfile.ci index cb80828eb95..f4253fe7255 100644 --- a/docker/Dockerfile.ci +++ b/docker/Dockerfile.ci @@ -1,5 +1,5 @@ ARG VLLM_BASE_IMAGE=vllm/vllm-openai -ARG VLLM_BASE_TAG=v0.17.0 +ARG VLLM_BASE_TAG=v0.18.0 FROM ${VLLM_BASE_IMAGE}:${VLLM_BASE_TAG} ARG APP_DIR=/workspace/vllm-omni WORKDIR ${APP_DIR} diff --git a/docker/Dockerfile.rocm b/docker/Dockerfile.rocm index 3e0b6065059..ce541497a34 100644 --- a/docker/Dockerfile.rocm +++ b/docker/Dockerfile.rocm @@ -1,4 +1,4 @@ -ARG BASE_IMAGE=vllm/vllm-openai-rocm:v0.17.0 +ARG BASE_IMAGE=vllm/vllm-openai-rocm:v0.18.0 FROM ${BASE_IMAGE} AS base # Declare a variable to know if we want to use the nightly build or the stable build. @@ -10,7 +10,7 @@ FROM ${BASE_IMAGE} AS base # we should swap over to use stable release ASAP. # We should avoid relying on custom commits. ARG USE_NIGHTLY_BUILD=0 -ARG VLLM_VERSION_OR_COMMIT_HASH=2d5be1dd5ce2e44dfea53ea03ff61143da5137eb +ARG VLLM_VERSION_OR_COMMIT_HASH=89138b21cc246ae944c741d5c399c148e2b770ab ARG ARG_PYTORCH_ROCM_ARCH ENV PYTORCH_ROCM_ARCH=${ARG_PYTORCH_ROCM_ARCH:-${PYTORCH_ROCM_ARCH}} diff --git a/docker/Dockerfile.xpu b/docker/Dockerfile.xpu index 02f57265fe0..8901725f06c 100644 --- a/docker/Dockerfile.xpu +++ b/docker/Dockerfile.xpu @@ -76,7 +76,7 @@ ENV UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL} ENV UV_INDEX_STRATEGY="unsafe-best-match" ENV UV_LINK_MODE="copy" -ARG VLLM_VERSION=v0.17.0 +ARG VLLM_VERSION=v0.18.0 RUN git clone -b ${VLLM_VERSION} https://github.com/vllm-project/vllm WORKDIR /workspace/vllm diff --git a/docs/getting_started/installation/gpu.md b/docs/getting_started/installation/gpu.md index 508ea307da8..297c3666169 100644 --- a/docs/getting_started/installation/gpu.md +++ b/docs/getting_started/installation/gpu.md @@ -30,7 +30,7 @@ vLLM-Omni is a Python library that supports the following GPU variants. The libr ### Pre-built wheels -Note: Pre-built wheels are currently only available for vLLM-Omni 0.11.0rc1, 0.12.0rc1, 0.14.0rc1, 0.14.0, 0.16.0. For the latest version, please [build from source](https://docs.vllm.ai/projects/vllm-omni/en/latest/getting_started/installation/gpu/#build-wheel-from-source). +Note: Pre-built wheels are currently available for vLLM-Omni 0.11.0rc1, 0.12.0rc1, 0.14.0rc1, 0.14.0, 0.16.0, and 0.18.0. If you need a newer unreleased revision, please [build from source](https://docs.vllm.ai/projects/vllm-omni/en/latest/getting_started/installation/gpu/#build-wheel-from-source). === "NVIDIA CUDA" diff --git a/docs/getting_started/installation/gpu/cuda.inc.md b/docs/getting_started/installation/gpu/cuda.inc.md index d8585ad685e..45fa69ddbb5 100644 --- a/docs/getting_started/installation/gpu/cuda.inc.md +++ b/docs/getting_started/installation/gpu/cuda.inc.md @@ -34,13 +34,13 @@ uv pip install vllm-omni # --8<-- [start:build-wheel-from-source] #### Installation of vLLM -If you do not need to modify source code of vLLM, you can directly install the stable 0.17.0 release version of the library +If you do not need to modify source code of vLLM, you can directly install the stable 0.18.0 release version of the library ```bash -uv pip install vllm==0.17.0 --torch-backend=auto +uv pip install vllm==0.18.0 --torch-backend=auto ``` -The release 0.17.0 of vLLM is based on PyTorch 2.10.0 which requires CUDA 12.9 environment. +The 0.18.0 release of vLLM ships CUDA 12.9-compatible binaries by default. If you need a different CUDA variant or want to reuse an existing PyTorch installation, build vLLM from source instead. #### Installation of vLLM-Omni Since vllm-omni is rapidly evolving, it's recommended to install it from source @@ -56,11 +56,11 @@ If you want to check, modify or debug with source code of vLLM, install the libr ```bash git clone https://github.com/vllm-project/vllm.git cd vllm -git checkout v0.17.0 +git checkout v0.18.0 ``` Set up environment variables to get pre-built wheels. If there are internet problems, just download the whl file manually. And set `VLLM_PRECOMPILED_WHEEL_LOCATION` as your local absolute path of whl file. ```bash -export VLLM_PRECOMPILED_WHEEL_LOCATION=https://github.com/vllm-project/vllm/releases/download/v0.17.0/vllm-0.17.0-cp38-abi3-manylinux_2_31_x86_64.whl +export VLLM_PRECOMPILED_WHEEL_LOCATION=https://github.com/vllm-project/vllm/releases/download/v0.18.0/vllm-0.18.0+cu129-cp38-abi3-manylinux_2_35_x86_64.whl ``` Install vllm with command below (If you have no existing PyTorch). ```bash @@ -91,7 +91,7 @@ docker run --runtime nvidia --gpus 2 \ --env "HF_TOKEN=$HF_TOKEN" \ -p 8091:8091 \ --ipc=host \ - vllm/vllm-omni:v0.16.0 \ + vllm/vllm-omni:v0.18.0 \ --model Qwen/Qwen3-Omni-30B-A3B-Instruct --port 8091 ``` diff --git a/docs/getting_started/installation/gpu/rocm.inc.md b/docs/getting_started/installation/gpu/rocm.inc.md index 701741cd091..da84561c966 100644 --- a/docs/getting_started/installation/gpu/rocm.inc.md +++ b/docs/getting_started/installation/gpu/rocm.inc.md @@ -13,7 +13,7 @@ vLLM-Omni current recommends the steps in under setup through Docker Images. vLLM-Omni is built based on vLLM. Please install it with command below. ```bash -uv pip install vllm==0.17.0+rocm700 --extra-index-url https://wheels.vllm.ai/rocm/0.17.0/rocm700 +uv pip install vllm==0.18.0+rocm700 --extra-index-url https://wheels.vllm.ai/rocm/0.18.0/rocm700 ``` #### Installation of vLLM-Omni @@ -34,13 +34,13 @@ uv pip install onnxruntime-rocm sox # --8<-- [start:build-wheel-from-source] #### Installation of vLLM -If you do not need to modify source code of vLLM, you can directly install the stable 0.17.0 release version of the library +If you do not need to modify source code of vLLM, you can directly install the stable 0.18.0 release version of the library ```bash -uv pip install vllm==0.17.0+rocm700 --extra-index-url https://wheels.vllm.ai/rocm/0.17.0/rocm700 +uv pip install vllm==0.18.0+rocm700 --extra-index-url https://wheels.vllm.ai/rocm/0.18.0/rocm700 ``` -The release 0.17.0 of vLLM requires ROCm 7.0 environment. +The pre-built 0.18.0 vLLM wheel targets ROCm 7.0. If you need a different ROCm stack or want to reuse an existing PyTorch installation, build vLLM from source instead. #### Installation of vLLM-Omni Since vllm-omni is rapidly evolving, it's recommended to install it from source @@ -58,7 +58,7 @@ If you want to check, modify or debug with source code of vLLM, install the libr ```bash git clone https://github.com/vllm-project/vllm.git cd vllm -git checkout v0.17.0 +git checkout v0.18.0 python3 -m pip install -r requirements/rocm.txt python3 setup.py develop ``` @@ -130,7 +130,7 @@ docker run --rm \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=$HF_TOKEN" \ -p 8091:8091 \ - vllm/vllm-omni-rocm:v0.16.0 \ + vllm/vllm-omni-rocm:v0.18.0 \ --model Qwen/Qwen3-Omni-30B-A3B-Instruct --omni --port 8091 ``` @@ -149,7 +149,7 @@ docker run --rm -it \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=$HF_TOKEN" \ --entrypoint bash \ - vllm/vllm-omni-rocm:v0.16.0 + vllm/vllm-omni-rocm:v0.18.0 ``` # --8<-- [end:pre-built-images] diff --git a/docs/getting_started/quickstart.md b/docs/getting_started/quickstart.md index 9b1182c6b3c..bf0978216bf 100644 --- a/docs/getting_started/quickstart.md +++ b/docs/getting_started/quickstart.md @@ -19,10 +19,10 @@ uv venv --python 3.12 --seed source .venv/bin/activate # On CUDA -uv pip install vllm==0.17.0 --torch-backend=auto +uv pip install vllm==0.18.0 --torch-backend=auto # On ROCm -uv pip install vllm==0.17.0 --extra-index-url https://wheels.vllm.ai/rocm/0.17.0/rocm700 +uv pip install vllm==0.18.0+rocm700 --extra-index-url https://wheels.vllm.ai/rocm/0.18.0/rocm700 git clone https://github.com/vllm-project/vllm-omni.git cd vllm-omni From ca162b50d04ddbc5caae66b64e380148c7612bb4 Mon Sep 17 00:00:00 2001 From: Gao Han Date: Sat, 21 Mar 2026 20:52:03 +0800 Subject: [PATCH 2/2] Refactor Dockerfile by removing vLLM installation Removed installation of vLLM and related dependencies from Dockerfile. Signed-off-by: Gao Han --- docker/Dockerfile.ci | 25 +++---------------------- 1 file changed, 3 insertions(+), 22 deletions(-) diff --git a/docker/Dockerfile.ci b/docker/Dockerfile.ci index 2289d3fbd43..f4253fe7255 100644 --- a/docker/Dockerfile.ci +++ b/docker/Dockerfile.ci @@ -11,29 +11,10 @@ RUN apt-get update && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -RUN uv pip uninstall --system -y vllm || true +# Install vllm-omni into the same uv-managed Python environment used by the base image. +# Use bash -c so that $(python3 -c ...) is expanded inside the container. +RUN uv pip install --system --no-cache-dir ".[dev]" -# Install vLLM from precompiled wheel at the selected commit. -# Must use direct URL because the wheel has a PEP 440 local version identifier -# (e.g. +g0a0a1a198) which pip/uv refuse to install from a PEP 503 package index. -ENV VLLM_PRECOMPILED_WHEEL_COMMIT=89138b21cc246ae944c741d5c399c148e2b770ab -RUN VLLM_WHEEL_URL=$(python3 -c "import urllib.request,re; \ - html=urllib.request.urlopen('https://wheels.vllm.ai/${VLLM_PRECOMPILED_WHEEL_COMMIT}/vllm/').read().decode(); \ - m=re.search(r'>(\S+x86_64\.whl)<',html); \ - print('https://wheels.vllm.ai/${VLLM_PRECOMPILED_WHEEL_COMMIT}/'+m.group(1).replace('+','%2B'))") && \ - echo "Installing vLLM from: ${VLLM_WHEEL_URL}" && \ - uv pip install --system --force-reinstall "${VLLM_WHEEL_URL}" - -RUN uv pip install --system ".[dev]" - -RUN uv pip install --system --upgrade \ - "flashinfer-cubin==0.6.6" \ - "nvidia-cublas-cu12==12.9.1.4" \ - "numpy==2.2.6" - -RUN uv pip install --system --upgrade \ - "flashinfer-jit-cache==0.6.6" \ - --index-url https://flashinfer.ai/whl/cu129 RUN ln -sf /usr/bin/python3 /usr/bin/python ENTRYPOINT []