diff --git a/docker/Dockerfile.npu b/docker/Dockerfile.npu
index ab58ebec0df..c555372d9a0 100644
--- a/docker/Dockerfile.npu
+++ b/docker/Dockerfile.npu
@@ -7,12 +7,8 @@ WORKDIR ${APP_DIR}
 
 COPY . .
 
-# Remove this replace when the dispatch of requirements is ready
-RUN sed -i -E 's/^([[:space:]]*)"fa3-fwd==0\.0\.1",/\1# "fa3-fwd==0.0.1",/' pyproject.toml \
- && sed -i -E 's/\bonnxruntime\b/onnxruntime-cann/g' pyproject.toml
-
 # Install vllm-omni with dev dependencies
-RUN pip install --no-cache-dir -e .
+RUN pip install --no-cache-dir -e . --no-build-isolation
 
 ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
 
diff --git a/docker/Dockerfile.npu.a3 b/docker/Dockerfile.npu.a3
index 17515fdb986..413ed88c31e 100644
--- a/docker/Dockerfile.npu.a3
+++ b/docker/Dockerfile.npu.a3
@@ -7,12 +7,8 @@ WORKDIR ${APP_DIR}
 
 COPY . .
 
-# Remove this replace when the dispatch of requirements is ready
-RUN sed -i -E 's/^([[:space:]]*)"fa3-fwd==0\.0\.1",/\1# "fa3-fwd==0.0.1",/' pyproject.toml \
- && sed -i -E 's/\bonnxruntime\b/onnxruntime-cann/g' pyproject.toml
-
 # Install vllm-omni with dev dependencies
-RUN pip install --no-cache-dir -e .
+RUN pip install --no-cache-dir -e . --no-build-isolation
 
 ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
 
diff --git a/docker/Dockerfile.rocm b/docker/Dockerfile.rocm
index bbb75a19617..d0af8342374 100644
--- a/docker/Dockerfile.rocm
+++ b/docker/Dockerfile.rocm
@@ -15,11 +15,7 @@ RUN mkdir -p ${COMMON_WORKDIR}/vllm-omni
 
 # Step 2: Copy vllm-omni code and install without uv
 COPY . ${COMMON_WORKDIR}/vllm-omni
-RUN cd ${COMMON_WORKDIR}/vllm-omni && uv pip install --python "$(python3 -c 'import sys; print(sys.executable)')" --no-cache-dir ".[dev]"
-
-# When we are installing onnxruntime-rocm, we need to uninstall the system-installed onnxruntime first.
-# These are the dependencies of Qwen3-TTS.
-RUN uv pip uninstall onnxruntime --system && uv pip install --no-cache-dir onnxruntime-rocm sox --system
+RUN cd ${COMMON_WORKDIR}/vllm-omni && uv pip install --python "$(python3 -c 'import sys; print(sys.executable)')" --no-cache-dir ".[dev]" --no-build-isolation
 
 RUN ln -sf /usr/bin/python3 /usr/bin/python
 
diff --git a/docs/getting_started/installation/gpu.md b/docs/getting_started/installation/gpu.md
index 73c974280ae..141256dbc7f 100644
--- a/docs/getting_started/installation/gpu.md
+++ b/docs/getting_started/installation/gpu.md
@@ -26,6 +26,8 @@ vLLM-Omni is a Python library that supports the following GPU variants. The libr
 
 ### Pre-built wheels
 
+Note: Pre-built wheels are currently only available for vLLM-Omni 0.11.0rc1, 0.12.0rc1, 0.14.0rc1, 0.14.0. For the latest version, please [build from source](https://docs.vllm.ai/projects/vllm-omni/en/latest/getting_started/installation/gpu/#build-wheel-from-source).
+
 === "NVIDIA CUDA"
 
     --8<-- "docs/getting_started/installation/gpu/cuda.inc.md:pre-built-wheels"
diff --git a/docs/getting_started/installation/gpu/cuda.inc.md b/docs/getting_started/installation/gpu/cuda.inc.md
index dfbfa1bf17c..09323cd2588 100644
--- a/docs/getting_started/installation/gpu/cuda.inc.md
+++ b/docs/getting_started/installation/gpu/cuda.inc.md
@@ -17,8 +17,6 @@ Therefore, it is recommended to install vLLM and vLLM-Omni with a **fresh new**
 # --8<-- [start:pre-built-wheels]
 
 #### Installation of vLLM
-Note: Pre-built wheels are currently only available for vLLM-Omni 0.11.0rc1, 0.12.0rc1, 0.14.0rc1, 0.14.0. For the latest version, please [build from source](https://docs.vllm.ai/projects/vllm-omni/en/latest/getting_started/installation/gpu/#build-wheel-from-source).
-
 
 vLLM-Omni is built based on vLLM. Please install it with command below.
 ```bash
diff --git a/docs/getting_started/installation/gpu/rocm.inc.md b/docs/getting_started/installation/gpu/rocm.inc.md
index 1a8ffb61284..f3b893f8e97 100644
--- a/docs/getting_started/installation/gpu/rocm.inc.md
+++ b/docs/getting_started/installation/gpu/rocm.inc.md
@@ -9,10 +9,60 @@ vLLM-Omni current recommends the steps in under setup through Docker Images.
 
 # --8<-- [start:pre-built-wheels]
 
+#### Installation of vLLM
+
+vLLM-Omni is built based on vLLM. Please install it with command below.
+```bash
+uv pip install vllm==0.14.0+rocm700 --extra-index-url https://wheels.vllm.ai/rocm/0.14.0/rocm700
+```
+
+#### Installation of vLLM-Omni
+
+```bash
+# we need to add --no-build-isolation as the torch
+# is not obtained from pypi, we have to install using the
+# torch installed in our environment
+uv pip install vllm-omni
+
+# Optional if want to run Qwen3 TTS
+uv pip uninstall onnxruntime # should be removed before we can install onnxruntime-rocm
+uv pip install onnxruntime-rocm sox
+```
+
 # --8<-- [end:pre-built-wheels]
 
 # --8<-- [start:build-wheel-from-source]
 
+#### Installation of vLLM
+If you do not need to modify source code of vLLM, you can directly install the stable 0.14.0 release version of the library
+
+```bash
+uv pip install vllm==0.14.0+rocm700 --extra-index-url https://wheels.vllm.ai/rocm/0.14.0/rocm700
+```
+
+The release 0.14.0 of vLLM requires ROCm 7.0 environment.
+
+#### Installation of vLLM-Omni
+Since vllm-omni is rapidly evolving, it's recommended to install it from source
+```bash
+git clone https://github.com/vllm-project/vllm-omni.git
+cd vllm-omni
+VLLM_OMNI_TARGET_DEVICE=rocm uv pip install -e .
+# OR
+uv pip install -e . --no-build-isolation
+```
+
+<details><summary>(Optional) Installation of vLLM from source</summary>
+If you want to check, modify or debug with source code of vLLM, install the library from source with the following instructions:
+
+```bash
+git clone https://github.com/vllm-project/vllm.git
+cd vllm
+git checkout v0.14.0
+python3 -m pip install -r requirements/rocm.txt
+python3 setup.py develop
+```
+
 # --8<-- [end:build-wheel-from-source]
 
 # --8<-- [start:build-docker]
diff --git a/docs/getting_started/installation/npu/npu.inc.md b/docs/getting_started/installation/npu/npu.inc.md
index ff71f40091c..c473820fe8d 100644
--- a/docs/getting_started/installation/npu/npu.inc.md
+++ b/docs/getting_started/installation/npu/npu.inc.md
@@ -38,12 +38,9 @@ docker run --rm \
 cd /vllm-workspace
 git clone -b v0.14.0 https://github.com/vllm-project/vllm-omni.git
 
-# Remove this replace when the dispatch of requirements is ready
-RUN sed -i -E 's/^([[:space:]]*)"fa3-fwd==0\.0\.1",/\1# "fa3-fwd==0.0.1",/' pyproject.toml \
- && sed -i -E 's/\bonnxruntime\b/onnxruntime-cann/g' pyproject.toml
-
 cd vllm-omni
-pip install -v -e .
+VLLM_OMNI_TARGET_DEVICE=npu pip install -v -e .
+# OR pip install -v -e . --no-build-isolation
 export VLLM_WORKER_MULTIPROC_METHOD=spawn
 ```
 
diff --git a/pyproject.toml b/pyproject.toml
index af13606c50d..706d0152e1c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,5 +1,9 @@
 [build-system]
-requires = ["setuptools>=61.0", "wheel"]
+requires = [
+    "setuptools>=77.0.3,<81.0.0",
+    "wheel",
+    "setuptools-scm>=8.0",
+]
 build-backend = "setuptools.build_meta"
 
 [project]
@@ -8,16 +12,16 @@ version = "0.14.0"
 description = "A framework for efficient model inference with omni-modality models"
 readme = "README.md"
 requires-python = ">=3.10,<3.14"
-license = {text = "Apache-2.0"}
+license = "Apache-2.0"
 authors = [
     {name = "vLLM-Omni Team"}
 ]
 keywords = ["vllm", "multimodal", "diffusion", "transformer", "inference", "serving"]
+dynamic = ["dependencies"]
 classifiers = [
     "Development Status :: 3 - Alpha",
     "Intended Audience :: Developers",
     "Intended Audience :: Science/Research",
-    "License :: OSI Approved :: Apache Software License",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
@@ -26,26 +30,10 @@ classifiers = [
     "Topic :: Software Development :: Libraries :: Python Modules",
 ]
 
-
-dependencies = [
-    # Core runtime dependencies (required for actual usage)
-    "omegaconf>=2.3.0",
-    "librosa>=0.11.0",
-    "resampy>=0.4.3",
-    "diffusers>=0.36.0",
-    "accelerate==1.12.0",
-    "gradio==5.50",
-    "soundfile>=0.13.1",
-    "cache-dit==1.2.0",
-    "tqdm>=4.66.0",
-    "torchsde>=0.2.6",  # Required for Stable Audio scheduler
-    "fa3-fwd==0.0.1", # flash attention 3, maintained by @ZJY0516
-    "openai-whisper>=20250625",
-    "imageio[ffmpeg]>=2.37.2",
-    "onnxruntime>=1.19.0",
-    "sox>=1.5.0",
-    # "vllm==0.15.0",  # TODO: fix the entrypoints overwrite problem
-]
+# Dependencies are now managed dynamically via setup.py based on detected hardware platform.
+# This allows automatic installation of the correct platform-specific dependencies (CUDA/ROCm/CPU/XPU/NPU)
+# without requiring extras like [cuda]. See requirements/ directory for platform-specific dependencies.
+# Note: vllm is intentionally excluded due to entrypoints overwrite issue.
 
 [project.optional-dependencies]
 
@@ -200,4 +188,5 @@ extend-ignore-identifiers-re = [
     ".*NOTHINK.*",
     ".*nin.*",
     "Ono_Anna",
+    ".*cann.*",
 ]
diff --git a/requirements/common.txt b/requirements/common.txt
new file mode 100644
index 00000000000..19ebca94939
--- /dev/null
+++ b/requirements/common.txt
@@ -0,0 +1,14 @@
+# Common dependencies for all platforms
+omegaconf>=2.3.0
+librosa>=0.11.0
+resampy>=0.4.3
+diffusers>=0.36.0
+accelerate==1.12.0
+gradio==5.50
+soundfile>=0.13.1
+cache-dit==1.2.0
+tqdm>=4.66.0
+torchsde>=0.2.6
+openai-whisper>=20250625
+imageio[ffmpeg]>=2.37.2
+sox>=1.5.0
diff --git a/requirements/cpu.txt b/requirements/cpu.txt
new file mode 100644
index 00000000000..c7732b1e13f
--- /dev/null
+++ b/requirements/cpu.txt
@@ -0,0 +1,2 @@
+-r common.txt
+onnxruntime>=1.23.2
diff --git a/requirements/cuda.txt b/requirements/cuda.txt
new file mode 100644
index 00000000000..823f472350a
--- /dev/null
+++ b/requirements/cuda.txt
@@ -0,0 +1,3 @@
+-r common.txt
+onnxruntime>=1.23.2
+fa3-fwd==0.0.1
diff --git a/requirements/npu.txt b/requirements/npu.txt
new file mode 100644
index 00000000000..9557427573c
--- /dev/null
+++ b/requirements/npu.txt
@@ -0,0 +1,3 @@
+-r common.txt
+onnxruntime-cann>=1.23.2
+torchaudio==2.9.0
diff --git a/requirements/rocm.txt b/requirements/rocm.txt
new file mode 100644
index 00000000000..849610eec82
--- /dev/null
+++ b/requirements/rocm.txt
@@ -0,0 +1,2 @@
+-r common.txt
+onnxruntime-rocm>=1.22.2
diff --git a/requirements/xpu.txt b/requirements/xpu.txt
new file mode 100644
index 00000000000..c7732b1e13f
--- /dev/null
+++ b/requirements/xpu.txt
@@ -0,0 +1,2 @@
+-r common.txt
+onnxruntime>=1.23.2
diff --git a/setup.py b/setup.py
new file mode 100644
index 00000000000..e33c848a3f9
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,171 @@
+"""
+Setup script for vLLM-Omni with hardware-dependent installation.
+
+This setup.py implements platform-aware dependency routing so users can run
+`pip install vllm-omni` and automatically receive the correct platform-specific
+dependencies (CUDA/ROCm/CPU/XPU/NPU) without requiring extras like `[cuda]`.
+"""
+
+import os
+import subprocess
+import sys
+from pathlib import Path
+
+from setuptools import setup
+
+
+def uninstall_onnxruntime() -> None:
+    """
+    Uninstall onnxruntime package if it exists.
+
+    This is necessary for ROCm environments where onnxruntime may conflict
+    with ROCm-specific dependencies.
+    """
+    try:
+        import pkg_resources
+
+        try:
+            pkg_resources.get_distribution("onnxruntime")
+            print("Found onnxruntime installed, uninstalling for ROCm compatibility...")
+            subprocess.check_call(
+                [sys.executable, "-m", "pip", "uninstall", "-y", "onnxruntime"],
+                stdout=subprocess.DEVNULL,
+                stderr=subprocess.DEVNULL,
+            )
+            print("Successfully uninstalled onnxruntime")
+        except pkg_resources.DistributionNotFound:
+            print("onnxruntime not installed, skipping uninstall")
+    except Exception as e:
+        print(f"Warning: Failed to uninstall onnxruntime: {e}")
+
+
+def detect_target_device() -> str:
+    """
+    Detect the target device for installation following RFC priority rules.
+
+    Priority order:
+    1. VLLM_OMNI_TARGET_DEVICE environment variable (highest priority)
+    2. Torch backend detection (cuda, rocm, npu, xpu)
+    3. CPU fallback (default)
+
+    Returns:
+        str: Device name ('cuda', 'rocm', 'npu', 'xpu', or 'cpu')
+    """
+    # Priority 1: Explicit override via environment variable
+    target_device = os.environ.get("VLLM_OMNI_TARGET_DEVICE")
+    if target_device:
+        valid_devices = ["cuda", "rocm", "npu", "xpu", "cpu"]
+        if target_device.lower() in valid_devices:
+            print(f"Using target device from VLLM_OMNI_TARGET_DEVICE: {target_device.lower()}")
+            return target_device.lower()
+        else:
+            print(f"Warning: Invalid VLLM_OMNI_TARGET_DEVICE '{target_device}', falling back to auto-detection")
+
+    # Priority 2: Torch backend detection
+    # This is a code path for when user is using
+    # --no-build-isolation flag
+    try:
+        import torch
+
+        # Check for CUDA
+        if torch.version.cuda is not None:
+            print("Detected CUDA backend from torch")
+            return "cuda"
+
+        # Check for ROCm (AMD)
+        if torch.version.hip is not None:
+            print("Detected ROCm backend from torch")
+            uninstall_onnxruntime()
+            return "rocm"
+
+        # Check for NPU (Ascend)
+        if hasattr(torch, "npu"):
+            try:
+                if torch.npu.is_available():
+                    print("Detected NPU backend from torch")
+                    return "npu"
+            except Exception:
+                pass
+
+        # Check for XPU (Intel)
+        if hasattr(torch, "xpu"):
+            try:
+                if torch.xpu.is_available():
+                    print("Detected XPU backend from torch")
+                    return "xpu"
+            except Exception:
+                pass
+
+        print("No GPU backend detected in torch, defaulting to CPU")
+        return "cpu"
+
+    except ImportError:
+        print("PyTorch not found, defaulting to CUDA installation")
+        return "cuda"
+
+
+def load_requirements(file_path: Path) -> list[str]:
+    """
+    Load requirements from a file, supporting -r directive for recursive loading.
+
+    Args:
+        file_path: Path to the requirements file
+
+    Returns:
+        List of requirement strings
+    """
+    requirements = []
+
+    if not file_path.exists():
+        print(f"Warning: Requirements file not found: {file_path}")
+        return requirements
+
+    with open(file_path) as f:
+        for line in f:
+            line = line.strip()
+
+            # Skip empty lines and comments
+            if not line or line.startswith("#"):
+                continue
+
+            # Handle -r directive for recursive loading
+            if line.startswith("-r "):
+                nested_file = line[3:].strip()
+                nested_path = file_path.parent / nested_file
+                requirements.extend(load_requirements(nested_path))
+            else:
+                requirements.append(line)
+
+    return requirements
+
+
+def get_install_requires() -> list[str]:
+    """
+    Get the list of dependencies based on detected platform.
+
+    Returns:
+        List of requirement strings for the detected platform
+    """
+    device = detect_target_device()
+    requirements_dir = Path(__file__).parent / "requirements"
+    requirements_file = requirements_dir / f"{device}.txt"
+
+    print(f"Loading requirements from: {requirements_file}")
+    requirements = load_requirements(requirements_file)
+
+    if not requirements:
+        print(f"Warning: No requirements loaded for device '{device}'")
+    else:
+        print(f"Loaded {len(requirements)} requirements for {device}")
+
+    return requirements
+
+
+if __name__ == "__main__":
+    # Get platform-specific dependencies
+    install_requires = get_install_requires()
+
+    # Setup configuration
+    setup(
+        install_requires=install_requires,
+    )