vllm-project · Gaohan123 · Mar 21, 2026 · Mar 20, 2026 · Mar 20, 2026 · Mar 21, 2026
@@ -10,7 +10,7 @@ steps:
           DOCKER_BUILDKIT: "1"
           # Buildkite will automatically replace this with the actual commit hash
           VLLM_IMAGE_TAG: "${BUILDKITE_COMMIT}"
-          VLLM_VERSION: "v0.17.0"
+          VLLM_VERSION: "v0.18.0"
         priority: 100
         timeout_in_minutes: 60
         soft_fail: true
@@ -1,5 +1,5 @@
 ARG VLLM_BASE_IMAGE=vllm/vllm-openai
-ARG VLLM_BASE_TAG=v0.17.0
+ARG VLLM_BASE_TAG=v0.18.0
 FROM ${VLLM_BASE_IMAGE}:${VLLM_BASE_TAG}
 ARG APP_DIR=/workspace/vllm-omni
 WORKDIR ${APP_DIR}
@@ -11,29 +11,10 @@ RUN apt-get update && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
 
-RUN uv pip uninstall --system -y vllm || true
+# Install vllm-omni into the same uv-managed Python environment used by the base image.
+# Use bash -c so that $(python3 -c ...) is expanded inside the container.
+RUN uv pip install --system --no-cache-dir ".[dev]"
 
-# Install vLLM from precompiled wheel at the selected commit.
-# Must use direct URL because the wheel has a PEP 440 local version identifier
-# (e.g. +g0a0a1a198) which pip/uv refuse to install from a PEP 503 package index.
-ENV VLLM_PRECOMPILED_WHEEL_COMMIT=89138b21cc246ae944c741d5c399c148e2b770ab
-RUN VLLM_WHEEL_URL=$(python3 -c "import urllib.request,re; \
-    html=urllib.request.urlopen('https://wheels.vllm.ai/${VLLM_PRECOMPILED_WHEEL_COMMIT}/vllm/').read().decode(); \
-    m=re.search(r'>(\S+x86_64\.whl)<',html); \
-    print('https://wheels.vllm.ai/${VLLM_PRECOMPILED_WHEEL_COMMIT}/'+m.group(1).replace('+','%2B'))") && \
-    echo "Installing vLLM from: ${VLLM_WHEEL_URL}" && \
-    uv pip install --system --force-reinstall "${VLLM_WHEEL_URL}"
-
-RUN uv pip install --system ".[dev]"
-
-RUN uv pip install --system --upgrade \
-        "flashinfer-cubin==0.6.6" \
-        "nvidia-cublas-cu12==12.9.1.4" \
-        "numpy==2.2.6"
-
-RUN uv pip install --system --upgrade \
-    "flashinfer-jit-cache==0.6.6" \
-    --index-url https://flashinfer.ai/whl/cu129
 RUN ln -sf /usr/bin/python3 /usr/bin/python
 
 ENTRYPOINT []
@@ -1,4 +1,4 @@
-ARG BASE_IMAGE=vllm/vllm-openai-rocm:v0.17.0
+ARG BASE_IMAGE=vllm/vllm-openai-rocm:v0.18.0
 FROM ${BASE_IMAGE} AS base
 
 # Declare a variable to know if we want to use the nightly build or the stable build.
@@ -10,7 +10,7 @@ FROM ${BASE_IMAGE} AS base
 #    we should swap over to use stable release ASAP.
 #    We should avoid relying on custom commits.
 ARG USE_NIGHTLY_BUILD=0
-ARG VLLM_VERSION_OR_COMMIT_HASH=2d5be1dd5ce2e44dfea53ea03ff61143da5137eb
+ARG VLLM_VERSION_OR_COMMIT_HASH=89138b21cc246ae944c741d5c399c148e2b770ab
 ARG ARG_PYTORCH_ROCM_ARCH
 ENV PYTORCH_ROCM_ARCH=${ARG_PYTORCH_ROCM_ARCH:-${PYTORCH_ROCM_ARCH}}
 

@@ -76,7 +76,7 @@ ENV UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
 ENV UV_INDEX_STRATEGY="unsafe-best-match"
 ENV UV_LINK_MODE="copy"
 
-ARG VLLM_VERSION=v0.17.0
+ARG VLLM_VERSION=v0.18.0
 RUN git clone -b ${VLLM_VERSION} https://github.com/vllm-project/vllm
 WORKDIR /workspace/vllm
 

@@ -30,7 +30,7 @@ vLLM-Omni is a Python library that supports the following GPU variants. The libr
 
 ### Pre-built wheels
 
-Note: Pre-built wheels are currently only available for vLLM-Omni 0.11.0rc1, 0.12.0rc1, 0.14.0rc1, 0.14.0, 0.16.0. For the latest version, please [build from source](https://docs.vllm.ai/projects/vllm-omni/en/latest/getting_started/installation/gpu/#build-wheel-from-source).
+Note: Pre-built wheels are currently available for vLLM-Omni 0.11.0rc1, 0.12.0rc1, 0.14.0rc1, 0.14.0, 0.16.0, and 0.18.0. If you need a newer unreleased revision, please [build from source](https://docs.vllm.ai/projects/vllm-omni/en/latest/getting_started/installation/gpu/#build-wheel-from-source).
 
 === "NVIDIA CUDA"
 

@@ -34,13 +34,13 @@ uv pip install vllm-omni
 # --8<-- [start:build-wheel-from-source]
 
 #### Installation of vLLM
-If you do not need to modify source code of vLLM, you can directly install the stable 0.17.0 release version of the library
+If you do not need to modify source code of vLLM, you can directly install the stable 0.18.0 release version of the library
 
 ```bash
-uv pip install vllm==0.17.0 --torch-backend=auto
+uv pip install vllm==0.18.0 --torch-backend=auto
 ```
 
-The release 0.17.0 of vLLM is based on PyTorch 2.10.0 which requires CUDA 12.9 environment.
+The 0.18.0 release of vLLM ships CUDA 12.9-compatible binaries by default. If you need a different CUDA variant or want to reuse an existing PyTorch installation, build vLLM from source instead.
 
 #### Installation of vLLM-Omni
 Since vllm-omni is rapidly evolving, it's recommended to install it from source
@@ -56,11 +56,11 @@ If you want to check, modify or debug with source code of vLLM, install the libr
 ```bash
 git clone https://github.com/vllm-project/vllm.git
 cd vllm
-git checkout v0.17.0
+git checkout v0.18.0
 ```
 Set up environment variables to get pre-built wheels. If there are internet problems, just download the whl file manually. And set `VLLM_PRECOMPILED_WHEEL_LOCATION` as your local absolute path of whl file.
 ```bash
-export VLLM_PRECOMPILED_WHEEL_LOCATION=https://github.com/vllm-project/vllm/releases/download/v0.17.0/vllm-0.17.0-cp38-abi3-manylinux_2_31_x86_64.whl
+export VLLM_PRECOMPILED_WHEEL_LOCATION=https://github.com/vllm-project/vllm/releases/download/v0.18.0/vllm-0.18.0+cu129-cp38-abi3-manylinux_2_35_x86_64.whl
 ```
 Install vllm with command below (If you have no existing PyTorch).
 ```bash
@@ -91,7 +91,7 @@ docker run --runtime nvidia --gpus 2 \
     --env "HF_TOKEN=$HF_TOKEN" \
     -p 8091:8091 \
     --ipc=host \
-    vllm/vllm-omni:v0.16.0 \
+    vllm/vllm-omni:v0.18.0 \
     --model Qwen/Qwen3-Omni-30B-A3B-Instruct --port 8091
 ```
 

@@ -13,7 +13,7 @@ vLLM-Omni current recommends the steps in under setup through Docker Images.
 
 vLLM-Omni is built based on vLLM. Please install it with command below.
 ```bash
-uv pip install vllm==0.17.0+rocm700 --extra-index-url https://wheels.vllm.ai/rocm/0.17.0/rocm700
+uv pip install vllm==0.18.0+rocm700 --extra-index-url https://wheels.vllm.ai/rocm/0.18.0/rocm700
 ```
 
 #### Installation of vLLM-Omni
@@ -34,13 +34,13 @@ uv pip install onnxruntime-rocm sox
 # --8<-- [start:build-wheel-from-source]
 
 #### Installation of vLLM
-If you do not need to modify source code of vLLM, you can directly install the stable 0.17.0 release version of the library
+If you do not need to modify source code of vLLM, you can directly install the stable 0.18.0 release version of the library
 
 ```bash
-uv pip install vllm==0.17.0+rocm700 --extra-index-url https://wheels.vllm.ai/rocm/0.17.0/rocm700
+uv pip install vllm==0.18.0+rocm700 --extra-index-url https://wheels.vllm.ai/rocm/0.18.0/rocm700
 ```
 
-The release 0.17.0 of vLLM requires ROCm 7.0 environment.
+The pre-built 0.18.0 vLLM wheel targets ROCm 7.0. If you need a different ROCm stack or want to reuse an existing PyTorch installation, build vLLM from source instead.
 
 #### Installation of vLLM-Omni
 Since vllm-omni is rapidly evolving, it's recommended to install it from source
@@ -58,7 +58,7 @@ If you want to check, modify or debug with source code of vLLM, install the libr
 ```bash
 git clone https://github.com/vllm-project/vllm.git
 cd vllm
-git checkout v0.17.0
+git checkout v0.18.0
 python3 -m pip install -r requirements/rocm.txt
 python3 setup.py develop
 ```
@@ -130,7 +130,7 @@ docker run --rm \
   -v ~/.cache/huggingface:/root/.cache/huggingface \
   --env "HF_TOKEN=$HF_TOKEN" \
   -p 8091:8091 \
-  vllm/vllm-omni-rocm:v0.16.0 \
+  vllm/vllm-omni-rocm:v0.18.0 \
   --model Qwen/Qwen3-Omni-30B-A3B-Instruct --omni --port 8091
 ```
 
@@ -149,7 +149,7 @@ docker run --rm -it \
   -v ~/.cache/huggingface:/root/.cache/huggingface \
   --env "HF_TOKEN=$HF_TOKEN" \
   --entrypoint bash \
-  vllm/vllm-omni-rocm:v0.16.0
+  vllm/vllm-omni-rocm:v0.18.0
 ```
 
 # --8<-- [end:pre-built-images]
@@ -19,10 +19,10 @@ uv venv --python 3.12 --seed
 source .venv/bin/activate
 
 # On CUDA
-uv pip install vllm==0.17.0 --torch-backend=auto
+uv pip install vllm==0.18.0 --torch-backend=auto
 
 # On ROCm
-uv pip install vllm==0.17.0 --extra-index-url https://wheels.vllm.ai/rocm/0.17.0/rocm700
+uv pip install vllm==0.18.0+rocm700 --extra-index-url https://wheels.vllm.ai/rocm/0.18.0/rocm700
 
 git clone https://github.com/vllm-project/vllm-omni.git
 cd vllm-omni