diff --git a/docker/Dockerfile.npu b/docker/Dockerfile.npu index ab58ebec0df..c555372d9a0 100644 --- a/docker/Dockerfile.npu +++ b/docker/Dockerfile.npu @@ -7,12 +7,8 @@ WORKDIR ${APP_DIR} COPY . . -# Remove this replace when the dispatch of requirements is ready -RUN sed -i -E 's/^([[:space:]]*)"fa3-fwd==0\.0\.1",/\1# "fa3-fwd==0.0.1",/' pyproject.toml \ - && sed -i -E 's/\bonnxruntime\b/onnxruntime-cann/g' pyproject.toml - # Install vllm-omni with dev dependencies -RUN pip install --no-cache-dir -e . +RUN pip install --no-cache-dir -e . --no-build-isolation ENV VLLM_WORKER_MULTIPROC_METHOD=spawn diff --git a/docker/Dockerfile.npu.a3 b/docker/Dockerfile.npu.a3 index 17515fdb986..413ed88c31e 100644 --- a/docker/Dockerfile.npu.a3 +++ b/docker/Dockerfile.npu.a3 @@ -7,12 +7,8 @@ WORKDIR ${APP_DIR} COPY . . -# Remove this replace when the dispatch of requirements is ready -RUN sed -i -E 's/^([[:space:]]*)"fa3-fwd==0\.0\.1",/\1# "fa3-fwd==0.0.1",/' pyproject.toml \ - && sed -i -E 's/\bonnxruntime\b/onnxruntime-cann/g' pyproject.toml - # Install vllm-omni with dev dependencies -RUN pip install --no-cache-dir -e . +RUN pip install --no-cache-dir -e . --no-build-isolation ENV VLLM_WORKER_MULTIPROC_METHOD=spawn diff --git a/docker/Dockerfile.rocm b/docker/Dockerfile.rocm index bbb75a19617..d0af8342374 100644 --- a/docker/Dockerfile.rocm +++ b/docker/Dockerfile.rocm @@ -15,11 +15,7 @@ RUN mkdir -p ${COMMON_WORKDIR}/vllm-omni # Step 2: Copy vllm-omni code and install without uv COPY . ${COMMON_WORKDIR}/vllm-omni -RUN cd ${COMMON_WORKDIR}/vllm-omni && uv pip install --python "$(python3 -c 'import sys; print(sys.executable)')" --no-cache-dir ".[dev]" - -# When we are installing onnxruntime-rocm, we need to uninstall the system-installed onnxruntime first. -# These are the dependencies of Qwen3-TTS. -RUN uv pip uninstall onnxruntime --system && uv pip install --no-cache-dir onnxruntime-rocm sox --system +RUN cd ${COMMON_WORKDIR}/vllm-omni && uv pip install --python "$(python3 -c 'import sys; print(sys.executable)')" --no-cache-dir ".[dev]" --no-build-isolation RUN ln -sf /usr/bin/python3 /usr/bin/python diff --git a/docs/getting_started/installation/gpu.md b/docs/getting_started/installation/gpu.md index 73c974280ae..141256dbc7f 100644 --- a/docs/getting_started/installation/gpu.md +++ b/docs/getting_started/installation/gpu.md @@ -26,6 +26,8 @@ vLLM-Omni is a Python library that supports the following GPU variants. The libr ### Pre-built wheels +Note: Pre-built wheels are currently only available for vLLM-Omni 0.11.0rc1, 0.12.0rc1, 0.14.0rc1, 0.14.0. For the latest version, please [build from source](https://docs.vllm.ai/projects/vllm-omni/en/latest/getting_started/installation/gpu/#build-wheel-from-source). + === "NVIDIA CUDA" --8<-- "docs/getting_started/installation/gpu/cuda.inc.md:pre-built-wheels" diff --git a/docs/getting_started/installation/gpu/cuda.inc.md b/docs/getting_started/installation/gpu/cuda.inc.md index dfbfa1bf17c..09323cd2588 100644 --- a/docs/getting_started/installation/gpu/cuda.inc.md +++ b/docs/getting_started/installation/gpu/cuda.inc.md @@ -17,8 +17,6 @@ Therefore, it is recommended to install vLLM and vLLM-Omni with a **fresh new** # --8<-- [start:pre-built-wheels] #### Installation of vLLM -Note: Pre-built wheels are currently only available for vLLM-Omni 0.11.0rc1, 0.12.0rc1, 0.14.0rc1, 0.14.0. For the latest version, please [build from source](https://docs.vllm.ai/projects/vllm-omni/en/latest/getting_started/installation/gpu/#build-wheel-from-source). - vLLM-Omni is built based on vLLM. Please install it with command below. ```bash diff --git a/docs/getting_started/installation/gpu/rocm.inc.md b/docs/getting_started/installation/gpu/rocm.inc.md index 1a8ffb61284..f3b893f8e97 100644 --- a/docs/getting_started/installation/gpu/rocm.inc.md +++ b/docs/getting_started/installation/gpu/rocm.inc.md @@ -9,10 +9,60 @@ vLLM-Omni current recommends the steps in under setup through Docker Images. # --8<-- [start:pre-built-wheels] +#### Installation of vLLM + +vLLM-Omni is built based on vLLM. Please install it with command below. +```bash +uv pip install vllm==0.14.0+rocm700 --extra-index-url https://wheels.vllm.ai/rocm/0.14.0/rocm700 +``` + +#### Installation of vLLM-Omni + +```bash +# we need to add --no-build-isolation as the torch +# is not obtained from pypi, we have to install using the +# torch installed in our environment +uv pip install vllm-omni + +# Optional if want to run Qwen3 TTS +uv pip uninstall onnxruntime # should be removed before we can install onnxruntime-rocm +uv pip install onnxruntime-rocm sox +``` + # --8<-- [end:pre-built-wheels] # --8<-- [start:build-wheel-from-source] +#### Installation of vLLM +If you do not need to modify source code of vLLM, you can directly install the stable 0.14.0 release version of the library + +```bash +uv pip install vllm==0.14.0+rocm700 --extra-index-url https://wheels.vllm.ai/rocm/0.14.0/rocm700 +``` + +The release 0.14.0 of vLLM requires ROCm 7.0 environment. + +#### Installation of vLLM-Omni +Since vllm-omni is rapidly evolving, it's recommended to install it from source +```bash +git clone https://github.com/vllm-project/vllm-omni.git +cd vllm-omni +VLLM_OMNI_TARGET_DEVICE=rocm uv pip install -e . +# OR +uv pip install -e . --no-build-isolation +``` + +
(Optional) Installation of vLLM from source +If you want to check, modify or debug with source code of vLLM, install the library from source with the following instructions: + +```bash +git clone https://github.com/vllm-project/vllm.git +cd vllm +git checkout v0.14.0 +python3 -m pip install -r requirements/rocm.txt +python3 setup.py develop +``` + # --8<-- [end:build-wheel-from-source] # --8<-- [start:build-docker] diff --git a/docs/getting_started/installation/npu/npu.inc.md b/docs/getting_started/installation/npu/npu.inc.md index ff71f40091c..c473820fe8d 100644 --- a/docs/getting_started/installation/npu/npu.inc.md +++ b/docs/getting_started/installation/npu/npu.inc.md @@ -38,12 +38,9 @@ docker run --rm \ cd /vllm-workspace git clone -b v0.14.0 https://github.com/vllm-project/vllm-omni.git -# Remove this replace when the dispatch of requirements is ready -RUN sed -i -E 's/^([[:space:]]*)"fa3-fwd==0\.0\.1",/\1# "fa3-fwd==0.0.1",/' pyproject.toml \ - && sed -i -E 's/\bonnxruntime\b/onnxruntime-cann/g' pyproject.toml - cd vllm-omni -pip install -v -e . +VLLM_OMNI_TARGET_DEVICE=npu pip install -v -e . +# OR pip install -v -e . --no-build-isolation export VLLM_WORKER_MULTIPROC_METHOD=spawn ``` diff --git a/pyproject.toml b/pyproject.toml index af13606c50d..706d0152e1c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,9 @@ [build-system] -requires = ["setuptools>=61.0", "wheel"] +requires = [ + "setuptools>=77.0.3,<81.0.0", + "wheel", + "setuptools-scm>=8.0", +] build-backend = "setuptools.build_meta" [project] @@ -8,16 +12,16 @@ version = "0.14.0" description = "A framework for efficient model inference with omni-modality models" readme = "README.md" requires-python = ">=3.10,<3.14" -license = {text = "Apache-2.0"} +license = "Apache-2.0" authors = [ {name = "vLLM-Omni Team"} ] keywords = ["vllm", "multimodal", "diffusion", "transformer", "inference", "serving"] +dynamic = ["dependencies"] classifiers = [ "Development Status :: 3 - Alpha", "Intended Audience :: Developers", "Intended Audience :: Science/Research", - "License :: OSI Approved :: Apache Software License", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", @@ -26,26 +30,10 @@ classifiers = [ "Topic :: Software Development :: Libraries :: Python Modules", ] - -dependencies = [ - # Core runtime dependencies (required for actual usage) - "omegaconf>=2.3.0", - "librosa>=0.11.0", - "resampy>=0.4.3", - "diffusers>=0.36.0", - "accelerate==1.12.0", - "gradio==5.50", - "soundfile>=0.13.1", - "cache-dit==1.2.0", - "tqdm>=4.66.0", - "torchsde>=0.2.6", # Required for Stable Audio scheduler - "fa3-fwd==0.0.1", # flash attention 3, maintained by @ZJY0516 - "openai-whisper>=20250625", - "imageio[ffmpeg]>=2.37.2", - "onnxruntime>=1.19.0", - "sox>=1.5.0", - # "vllm==0.15.0", # TODO: fix the entrypoints overwrite problem -] +# Dependencies are now managed dynamically via setup.py based on detected hardware platform. +# This allows automatic installation of the correct platform-specific dependencies (CUDA/ROCm/CPU/XPU/NPU) +# without requiring extras like [cuda]. See requirements/ directory for platform-specific dependencies. +# Note: vllm is intentionally excluded due to entrypoints overwrite issue. [project.optional-dependencies] @@ -200,4 +188,5 @@ extend-ignore-identifiers-re = [ ".*NOTHINK.*", ".*nin.*", "Ono_Anna", + ".*cann.*", ] diff --git a/requirements/common.txt b/requirements/common.txt new file mode 100644 index 00000000000..19ebca94939 --- /dev/null +++ b/requirements/common.txt @@ -0,0 +1,14 @@ +# Common dependencies for all platforms +omegaconf>=2.3.0 +librosa>=0.11.0 +resampy>=0.4.3 +diffusers>=0.36.0 +accelerate==1.12.0 +gradio==5.50 +soundfile>=0.13.1 +cache-dit==1.2.0 +tqdm>=4.66.0 +torchsde>=0.2.6 +openai-whisper>=20250625 +imageio[ffmpeg]>=2.37.2 +sox>=1.5.0 diff --git a/requirements/cpu.txt b/requirements/cpu.txt new file mode 100644 index 00000000000..c7732b1e13f --- /dev/null +++ b/requirements/cpu.txt @@ -0,0 +1,2 @@ +-r common.txt +onnxruntime>=1.23.2 diff --git a/requirements/cuda.txt b/requirements/cuda.txt new file mode 100644 index 00000000000..823f472350a --- /dev/null +++ b/requirements/cuda.txt @@ -0,0 +1,3 @@ +-r common.txt +onnxruntime>=1.23.2 +fa3-fwd==0.0.1 diff --git a/requirements/npu.txt b/requirements/npu.txt new file mode 100644 index 00000000000..9557427573c --- /dev/null +++ b/requirements/npu.txt @@ -0,0 +1,3 @@ +-r common.txt +onnxruntime-cann>=1.23.2 +torchaudio==2.9.0 diff --git a/requirements/rocm.txt b/requirements/rocm.txt new file mode 100644 index 00000000000..849610eec82 --- /dev/null +++ b/requirements/rocm.txt @@ -0,0 +1,2 @@ +-r common.txt +onnxruntime-rocm>=1.22.2 diff --git a/requirements/xpu.txt b/requirements/xpu.txt new file mode 100644 index 00000000000..c7732b1e13f --- /dev/null +++ b/requirements/xpu.txt @@ -0,0 +1,2 @@ +-r common.txt +onnxruntime>=1.23.2 diff --git a/setup.py b/setup.py new file mode 100644 index 00000000000..e33c848a3f9 --- /dev/null +++ b/setup.py @@ -0,0 +1,171 @@ +""" +Setup script for vLLM-Omni with hardware-dependent installation. + +This setup.py implements platform-aware dependency routing so users can run +`pip install vllm-omni` and automatically receive the correct platform-specific +dependencies (CUDA/ROCm/CPU/XPU/NPU) without requiring extras like `[cuda]`. +""" + +import os +import subprocess +import sys +from pathlib import Path + +from setuptools import setup + + +def uninstall_onnxruntime() -> None: + """ + Uninstall onnxruntime package if it exists. + + This is necessary for ROCm environments where onnxruntime may conflict + with ROCm-specific dependencies. + """ + try: + import pkg_resources + + try: + pkg_resources.get_distribution("onnxruntime") + print("Found onnxruntime installed, uninstalling for ROCm compatibility...") + subprocess.check_call( + [sys.executable, "-m", "pip", "uninstall", "-y", "onnxruntime"], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + print("Successfully uninstalled onnxruntime") + except pkg_resources.DistributionNotFound: + print("onnxruntime not installed, skipping uninstall") + except Exception as e: + print(f"Warning: Failed to uninstall onnxruntime: {e}") + + +def detect_target_device() -> str: + """ + Detect the target device for installation following RFC priority rules. + + Priority order: + 1. VLLM_OMNI_TARGET_DEVICE environment variable (highest priority) + 2. Torch backend detection (cuda, rocm, npu, xpu) + 3. CPU fallback (default) + + Returns: + str: Device name ('cuda', 'rocm', 'npu', 'xpu', or 'cpu') + """ + # Priority 1: Explicit override via environment variable + target_device = os.environ.get("VLLM_OMNI_TARGET_DEVICE") + if target_device: + valid_devices = ["cuda", "rocm", "npu", "xpu", "cpu"] + if target_device.lower() in valid_devices: + print(f"Using target device from VLLM_OMNI_TARGET_DEVICE: {target_device.lower()}") + return target_device.lower() + else: + print(f"Warning: Invalid VLLM_OMNI_TARGET_DEVICE '{target_device}', falling back to auto-detection") + + # Priority 2: Torch backend detection + # This is a code path for when user is using + # --no-build-isolation flag + try: + import torch + + # Check for CUDA + if torch.version.cuda is not None: + print("Detected CUDA backend from torch") + return "cuda" + + # Check for ROCm (AMD) + if torch.version.hip is not None: + print("Detected ROCm backend from torch") + uninstall_onnxruntime() + return "rocm" + + # Check for NPU (Ascend) + if hasattr(torch, "npu"): + try: + if torch.npu.is_available(): + print("Detected NPU backend from torch") + return "npu" + except Exception: + pass + + # Check for XPU (Intel) + if hasattr(torch, "xpu"): + try: + if torch.xpu.is_available(): + print("Detected XPU backend from torch") + return "xpu" + except Exception: + pass + + print("No GPU backend detected in torch, defaulting to CPU") + return "cpu" + + except ImportError: + print("PyTorch not found, defaulting to CUDA installation") + return "cuda" + + +def load_requirements(file_path: Path) -> list[str]: + """ + Load requirements from a file, supporting -r directive for recursive loading. + + Args: + file_path: Path to the requirements file + + Returns: + List of requirement strings + """ + requirements = [] + + if not file_path.exists(): + print(f"Warning: Requirements file not found: {file_path}") + return requirements + + with open(file_path) as f: + for line in f: + line = line.strip() + + # Skip empty lines and comments + if not line or line.startswith("#"): + continue + + # Handle -r directive for recursive loading + if line.startswith("-r "): + nested_file = line[3:].strip() + nested_path = file_path.parent / nested_file + requirements.extend(load_requirements(nested_path)) + else: + requirements.append(line) + + return requirements + + +def get_install_requires() -> list[str]: + """ + Get the list of dependencies based on detected platform. + + Returns: + List of requirement strings for the detected platform + """ + device = detect_target_device() + requirements_dir = Path(__file__).parent / "requirements" + requirements_file = requirements_dir / f"{device}.txt" + + print(f"Loading requirements from: {requirements_file}") + requirements = load_requirements(requirements_file) + + if not requirements: + print(f"Warning: No requirements loaded for device '{device}'") + else: + print(f"Loaded {len(requirements)} requirements for {device}") + + return requirements + + +if __name__ == "__main__": + # Get platform-specific dependencies + install_requires = get_install_requires() + + # Setup configuration + setup( + install_requires=install_requires, + )