vllm-project · Isotr0py · Feb 5, 2026 · Jan 29, 2026 · Jan 29, 2026 · Jan 29, 2026
@@ -7,12 +7,8 @@ WORKDIR ${APP_DIR}
 
 COPY . .
 
-# Remove this replace when the dispatch of requirements is ready
-RUN sed -i -E 's/^([[:space:]]*)"fa3-fwd==0\.0\.1",/\1# "fa3-fwd==0.0.1",/' pyproject.toml \
- && sed -i -E 's/\bonnxruntime\b/onnxruntime-cann/g' pyproject.toml
-
 # Install vllm-omni with dev dependencies
-RUN pip install --no-cache-dir -e .
+RUN pip install --no-cache-dir -e . --no-build-isolation
 
 ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
 

@@ -7,12 +7,8 @@ WORKDIR ${APP_DIR}
 
 COPY . .
 
-# Remove this replace when the dispatch of requirements is ready
-RUN sed -i -E 's/^([[:space:]]*)"fa3-fwd==0\.0\.1",/\1# "fa3-fwd==0.0.1",/' pyproject.toml \
- && sed -i -E 's/\bonnxruntime\b/onnxruntime-cann/g' pyproject.toml
-
 # Install vllm-omni with dev dependencies
-RUN pip install --no-cache-dir -e .
+RUN pip install --no-cache-dir -e . --no-build-isolation
 
 ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
 

@@ -15,11 +15,7 @@ RUN mkdir -p ${COMMON_WORKDIR}/vllm-omni
 
 # Step 2: Copy vllm-omni code and install without uv
 COPY . ${COMMON_WORKDIR}/vllm-omni
-RUN cd ${COMMON_WORKDIR}/vllm-omni && uv pip install --python "$(python3 -c 'import sys; print(sys.executable)')" --no-cache-dir ".[dev]"
-
-# When we are installing onnxruntime-rocm, we need to uninstall the system-installed onnxruntime first.
-# These are the dependencies of Qwen3-TTS.
-RUN uv pip uninstall onnxruntime --system && uv pip install --no-cache-dir onnxruntime-rocm sox --system
+RUN cd ${COMMON_WORKDIR}/vllm-omni && uv pip install --python "$(python3 -c 'import sys; print(sys.executable)')" --no-cache-dir ".[dev]" --no-build-isolation
 
 RUN ln -sf /usr/bin/python3 /usr/bin/python
 

@@ -26,6 +26,8 @@ vLLM-Omni is a Python library that supports the following GPU variants. The libr
 
 ### Pre-built wheels
 
+Note: Pre-built wheels are currently only available for vLLM-Omni 0.11.0rc1, 0.12.0rc1, 0.14.0rc1, 0.14.0. For the latest version, please [build from source](https://docs.vllm.ai/projects/vllm-omni/en/latest/getting_started/installation/gpu/#build-wheel-from-source).
+
 === "NVIDIA CUDA"
 
     --8<-- "docs/getting_started/installation/gpu/cuda.inc.md:pre-built-wheels"

@@ -17,8 +17,6 @@ Therefore, it is recommended to install vLLM and vLLM-Omni with a **fresh new**
 # --8<-- [start:pre-built-wheels]
 
 #### Installation of vLLM
-Note: Pre-built wheels are currently only available for vLLM-Omni 0.11.0rc1, 0.12.0rc1, 0.14.0rc1, 0.14.0. For the latest version, please [build from source](https://docs.vllm.ai/projects/vllm-omni/en/latest/getting_started/installation/gpu/#build-wheel-from-source).
-
 
 vLLM-Omni is built based on vLLM. Please install it with command below.
 ```bash

@@ -9,10 +9,60 @@ vLLM-Omni current recommends the steps in under setup through Docker Images.
 
 # --8<-- [start:pre-built-wheels]
 
+#### Installation of vLLM
+
+vLLM-Omni is built based on vLLM. Please install it with command below.
+```bash
+uv pip install vllm==0.14.0+rocm700 --extra-index-url https://wheels.vllm.ai/rocm/0.14.0/rocm700
+```
+
+#### Installation of vLLM-Omni
+
+```bash
+# we need to add --no-build-isolation as the torch
+# is not obtained from pypi, we have to install using the
+# torch installed in our environment
+uv pip install vllm-omni
+
+# Optional if want to run Qwen3 TTS
+uv pip uninstall onnxruntime # should be removed before we can install onnxruntime-rocm
+uv pip install onnxruntime-rocm sox
+```
+
 # --8<-- [end:pre-built-wheels]
 
 # --8<-- [start:build-wheel-from-source]
 
+#### Installation of vLLM
+If you do not need to modify source code of vLLM, you can directly install the stable 0.14.0 release version of the library
+
+```bash
+uv pip install vllm==0.14.0+rocm700 --extra-index-url https://wheels.vllm.ai/rocm/0.14.0/rocm700
+```
+
+The release 0.14.0 of vLLM requires ROCm 7.0 environment.
+
+#### Installation of vLLM-Omni
+Since vllm-omni is rapidly evolving, it's recommended to install it from source
+```bash
+git clone https://github.com/vllm-project/vllm-omni.git
+cd vllm-omni
+VLLM_OMNI_TARGET_DEVICE=rocm uv pip install -e .
+# OR
+uv pip install -e . --no-build-isolation
+```
+
+<details><summary>(Optional) Installation of vLLM from source</summary>
+If you want to check, modify or debug with source code of vLLM, install the library from source with the following instructions:
+
+```bash
+git clone https://github.com/vllm-project/vllm.git
+cd vllm
+git checkout v0.14.0
+python3 -m pip install -r requirements/rocm.txt
+python3 setup.py develop
+```
+
 # --8<-- [end:build-wheel-from-source]
 
 # --8<-- [start:build-docker]

@@ -38,12 +38,9 @@ docker run --rm \
 cd /vllm-workspace
 git clone -b v0.14.0 https://github.com/vllm-project/vllm-omni.git
 
-# Remove this replace when the dispatch of requirements is ready
-RUN sed -i -E 's/^([[:space:]]*)"fa3-fwd==0\.0\.1",/\1# "fa3-fwd==0.0.1",/' pyproject.toml \
- && sed -i -E 's/\bonnxruntime\b/onnxruntime-cann/g' pyproject.toml
-
 cd vllm-omni
-pip install -v -e .
+VLLM_OMNI_TARGET_DEVICE=npu pip install -v -e .
+# OR pip install -v -e . --no-build-isolation
 export VLLM_WORKER_MULTIPROC_METHOD=spawn
 ```
 

@@ -1,5 +1,9 @@
 [build-system]
-requires = ["setuptools>=61.0", "wheel"]
+requires = [
+    "setuptools>=77.0.3,<81.0.0",
+    "wheel",
+    "setuptools-scm>=8.0",
+]
 build-backend = "setuptools.build_meta"
 
 [project]
@@ -8,16 +12,16 @@ version = "0.14.0"
 description = "A framework for efficient model inference with omni-modality models"
 readme = "README.md"
 requires-python = ">=3.10,<3.14"
-license = {text = "Apache-2.0"}
+license = "Apache-2.0"
 authors = [
     {name = "vLLM-Omni Team"}
 ]
 keywords = ["vllm", "multimodal", "diffusion", "transformer", "inference", "serving"]
+dynamic = ["dependencies"]
 classifiers = [
     "Development Status :: 3 - Alpha",
     "Intended Audience :: Developers",
     "Intended Audience :: Science/Research",
-    "License :: OSI Approved :: Apache Software License",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
@@ -26,26 +30,10 @@ classifiers = [
     "Topic :: Software Development :: Libraries :: Python Modules",
 ]
 
-
-dependencies = [
-    # Core runtime dependencies (required for actual usage)
-    "omegaconf>=2.3.0",
-    "librosa>=0.11.0",
-    "resampy>=0.4.3",
-    "diffusers>=0.36.0",
-    "accelerate==1.12.0",
-    "gradio==5.50",
-    "soundfile>=0.13.1",
-    "cache-dit==1.2.0",
-    "tqdm>=4.66.0",
-    "torchsde>=0.2.6",  # Required for Stable Audio scheduler
-    "fa3-fwd==0.0.1", # flash attention 3, maintained by @ZJY0516
-    "openai-whisper>=20250625",
-    "imageio[ffmpeg]>=2.37.2",
-    "onnxruntime>=1.19.0",
-    "sox>=1.5.0",
-    # "vllm==0.15.0",  # TODO: fix the entrypoints overwrite problem
-]
+# Dependencies are now managed dynamically via setup.py based on detected hardware platform.
+# This allows automatic installation of the correct platform-specific dependencies (CUDA/ROCm/CPU/XPU/NPU)
+# without requiring extras like [cuda]. See requirements/ directory for platform-specific dependencies.
+# Note: vllm is intentionally excluded due to entrypoints overwrite issue.
 
 [project.optional-dependencies]
 
@@ -200,4 +188,5 @@ extend-ignore-identifiers-re = [
     ".*NOTHINK.*",
     ".*nin.*",
     "Ono_Anna",
+    ".*cann.*",
 ]
@@ -0,0 +1,14 @@
+# Common dependencies for all platforms
+omegaconf>=2.3.0
+librosa>=0.11.0
+resampy>=0.4.3
+diffusers>=0.36.0
+accelerate==1.12.0
+gradio==5.50
+soundfile>=0.13.1
+cache-dit==1.2.0
+tqdm>=4.66.0
+torchsde>=0.2.6
+openai-whisper>=20250625
+imageio[ffmpeg]>=2.37.2
+sox>=1.5.0
@@ -0,0 +1,2 @@
+-r common.txt
+onnxruntime>=1.23.2
@@ -0,0 +1,3 @@
+-r common.txt
+onnxruntime>=1.23.2
+fa3-fwd==0.0.1
@@ -0,0 +1,3 @@
+-r common.txt
+onnxruntime-cann>=1.23.2
+torchaudio==2.9.0
@@ -0,0 +1,2 @@
+-r common.txt
+onnxruntime-rocm>=1.22.2
@@ -0,0 +1,2 @@
+-r common.txt
+onnxruntime>=1.23.2
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		-r common.txt
		onnxruntime>=1.23.2
Comment thread tjtanaa marked this conversation as resolved.