vllm-project · inaniloquentee · Apr 5, 2026 · Apr 5, 2026 · Apr 5, 2026 · Apr 5, 2026
@@ -44,11 +44,19 @@ steps:
     agents:
       queue: "cpu_queue_premerge"
 
-  # L4 Test — main+NIGHTLY=1 (scheduled), or PR with label nightly-test (e.g. add label then Rebuild)
+  # L4 Test — main+NIGHTLY=1 (scheduled), or PR with specific label (e.g. add label then Rebuild)
   - label: "Upload Nightly Pipeline"
     depends_on: image-build
     key: upload-nightly-pipeline
-    if: '(build.branch == "main" && build.env("NIGHTLY") == "1") || (build.branch != "main" && build.pull_request.labels includes "nightly-test")'
+    if: >-
+      (build.branch == "main" && build.env("NIGHTLY") == "1") ||
+      (build.branch != "main" && (
+        build.pull_request.labels includes "nightly-test" ||
+        build.pull_request.labels includes "omni-test" ||
+        build.pull_request.labels includes "tts-test" ||
+        build.pull_request.labels includes "diffusion-x2iat-test" ||
+        build.pull_request.labels includes "diffusion-x2v-test"
+      ))
     commands:
       - buildkite-agent pipeline upload .buildkite/test-nightly.yml
     agents:

@@ -86,7 +86,7 @@ Through five levels (L1-L5) and common (Common) specifications, the system clari
         /tests/e2e/online_serving/test_{model_name}_expansion.py<br>
         /tests/e2e/offline_inference/test_{model_name}_expansion.py<br>
         <strong>Performance:</strong><br>
-        /tests/dfx/perf/tests/test.json<br>
+        /tests/dfx/perf/tests/test_omni.json (Omni) and test_tts.json (TTS)<br>
         <strong>Doc Test:</strong><br>
         tests/example/online_serving/test_{model_name}.py<br>
         tests/example/offline_inference/test_{model_name}.py
@@ -530,13 +530,13 @@ L4 level testing is a comprehensive quality audit before a version release. It e
 ### 3.2 Testing Content and Scope
 
 -   ***Full Functionality Testing***: Executes all test cases defined in `test_{model_name}_expansion.py`, covering all implemented features, positive flows, boundary conditions, and exception handling.
--   ***Performance Testing***: Uses the `tests/dfx/perf/tests/test.json` configuration file to drive performance testing tools for stress, load, and endurance tests, collecting metrics like throughput, response time, and resource utilization.
+-   ***Performance Testing***: Uses `tests/dfx/perf/tests/test_omni.json` and `test_tts.json` (passed to `run_benchmark.py` via `--config-file`) to drive performance testing tools for stress, load, and endurance tests, collecting metrics like throughput, response time, and resource utilization.
 -   ***Documentation Testing***: Verifies whether the example code provided to users is runnable and its results match the description.
 
 ### 3.3 Test Directory and Execution Files
 
 -   ***Functional Testing***: Same directories as L3.
--   ***Performance Test Configuration***: `tests/dfx/perf/tests/test.json`
+-   ***Performance Test Configuration***: `tests/dfx/perf/tests/test_omni.json`, `tests/dfx/perf/tests/test_tts.json`
 -   ***Documentation Example Tests***:
 -   -   `tests/example/online_serving/test_{model_name}.py`
     -   `tests/example/offline_inference/test_{model_name}.py`

@@ -1,4 +1,4 @@
-When you want to add L4-level ***performance test*** cases, you can refer to the following format for case addition in tests/dfx/perf/tests/test.json:
+When you want to add L4-level ***performance test*** cases, you can refer to the following format for case addition in `tests/dfx/perf/tests/test_omni.json` or `tests/dfx/perf/tests/test_tts.json` (selected via `pytest ... run_benchmark.py --config-file <path>`):
 
 ```JSON
 {

@@ -45,7 +45,6 @@ Our test scripts use the pytest framework. First, please use `git clone https://
 === "L3 level & L4 level"
 
     ```bash
-    cd tests
     pytest -s -v -m "advanced_model" --run-level=advanced_model
     ```
     If you only want to run L3 test case, you can use:
@@ -60,9 +59,9 @@ Our test scripts use the pytest framework. First, please use `git clone https://
     ```bash
     pytest -s -v -m "core_model and distributed_cuda and L4"  --run-level=core_model
     ```
-    Note: To run performance tests, use:
+    Note: To run performance tests (defaults to ``test_omni.json``; use ``--config-file tests/dfx/perf/tests/test_tts.json`` for TTS):
     ```bash
-    pytest -s -v perf/scripts/run_benchmark.py
+    pytest -s -v tests/dfx/perf/scripts/run_benchmark.py
     ```
 
     The latest L3 test commands for various test suites can be found in the [pipeline](https://github.com/vllm-project/vllm-omni/blob/main/.buildkite/test-merge.yml).

@@ -1,6 +1,7 @@
 import json
 import os
 import subprocess
+import sys
 import threading
 from datetime import datetime
 from pathlib import Path
@@ -21,9 +22,32 @@
 os.environ["VLLM_TEST_CLEAN_GPU_MEMORY"] = "0"
 
 
-CONFIG_FILE_PATH = str(Path(__file__).parent.parent / "tests" / "test.json")
-BENCHMARK_CONFIGS = load_configs(CONFIG_FILE_PATH)
+def _get_config_file_from_argv() -> str | None:
+    """Read ``--config-file`` from ``sys.argv`` at import time so parametrization can use it.
+
+    Supports ``--config-file path`` and ``--config-file=path`` (same pattern as
+    ``run_diffusion_benchmark.py``).
+    """
+    for i, arg in enumerate(sys.argv):
+        if arg == "--config-file" and i + 1 < len(sys.argv):
+            return sys.argv[i + 1]
+        if arg.startswith("--config-file="):
+            return arg.split("=", 1)[1]
+    return None
+
+
+_PERF_TESTS_DIR = Path(__file__).resolve().parent.parent / "tests"
+_DEFAULT_CONFIG_FILE = str(_PERF_TESTS_DIR / "test_omni.json")
+
+CONFIG_FILE_PATH = _get_config_file_from_argv()
+if CONFIG_FILE_PATH is None:
+    print(
+        "No --config-file in argv, using default: tests/dfx/perf/tests/test_omni.json "
+        "(override with e.g. --config-file tests/dfx/perf/tests/test_tts.json)"
+    )
+    CONFIG_FILE_PATH = _DEFAULT_CONFIG_FILE
 
+BENCHMARK_CONFIGS = load_configs(CONFIG_FILE_PATH)
 
 STAGE_CONFIGS_DIR = Path(__file__).parent.parent / "stage_configs"
 test_params = create_unique_server_params(BENCHMARK_CONFIGS, STAGE_CONFIGS_DIR)
@@ -32,6 +56,18 @@
 _omni_server_lock = threading.Lock()
 
 
+def pytest_addoption(parser: pytest.Parser) -> None:
+    parser.addoption(
+        "--config-file",
+        action="store",
+        default=None,
+        help=(
+            "Path to Omni/TTS serve benchmark JSON (default: tests/dfx/perf/tests/test_omni.json). "
+            "Example: --config-file tests/dfx/perf/tests/test_tts.json"
+        ),
+    )
+
+
 @pytest.fixture(scope="module")
 def omni_server(request):
     """Start vLLM-Omni server as a subprocess with actual model weights.

@@ -200,37 +200,5 @@
                 }
             }
         ]
-    },
-    {
-        "test_name": "test_qwen3_tts",
-        "server_params": {
-            "model": "Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice"
-        },
-        "benchmark_params": [
-            {
-                "dataset_name": "random",
-                "backend": "openai-audio-speech",
-                "endpoint": "/v1/audio/speech",
-                "num_prompts": [
-                    10,
-                    40
-                ],
-                "max_concurrency": [
-                    1,
-                    4
-                ],
-                "random_input_len": 100,
-                "random_output_len": 100,
-                "extra_body": {
-                    "voice": "Vivian",
-                    "language": "English"
-                },
-                "percentile-metrics": "ttft,e2el,audio_rtf,audio_ttfp,audio_duration",
-                "baseline": {
-                    "mean_audio_ttfp_ms": [6000, 6000],
-                    "mean_audio_rtf": [0.3, 0.3]
-                }
-            }
-        ]
     }
 ]
@@ -0,0 +1,34 @@
+[
+    {
+        "test_name": "test_qwen3_tts",
+        "server_params": {
+            "model": "Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice"
+        },
+        "benchmark_params": [
+            {
+                "dataset_name": "random",
+                "backend": "openai-audio-speech",
+                "endpoint": "/v1/audio/speech",
+                "num_prompts": [
+                    10,
+                    40
+                ],
+                "max_concurrency": [
+                    1,
+                    4
+                ],
+                "random_input_len": 100,
+                "random_output_len": 100,
+                "extra_body": {
+                    "voice": "Vivian",
+                    "language": "English"
+                },
+                "percentile-metrics": "ttft,e2el,audio_rtf,audio_ttfp,audio_duration",
+                "baseline": {
+                    "mean_audio_ttfp_ms": [6000, 6000],
+                    "mean_audio_rtf": [0.3, 0.3]
+                }
+            }
+        ]
+    }
+]