diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
index d9a2315953..00823951dc 100644
--- a/.buildkite/pipeline.yml
+++ b/.buildkite/pipeline.yml
@@ -44,11 +44,19 @@ steps:
agents:
queue: "cpu_queue_premerge"
- # L4 Test — main+NIGHTLY=1 (scheduled), or PR with label nightly-test (e.g. add label then Rebuild)
+ # L4 Test — main+NIGHTLY=1 (scheduled), or PR with specific label (e.g. add label then Rebuild)
- label: "Upload Nightly Pipeline"
depends_on: image-build
key: upload-nightly-pipeline
- if: '(build.branch == "main" && build.env("NIGHTLY") == "1") || (build.branch != "main" && build.pull_request.labels includes "nightly-test")'
+ if: >-
+ (build.branch == "main" && build.env("NIGHTLY") == "1") ||
+ (build.branch != "main" && (
+ build.pull_request.labels includes "nightly-test" ||
+ build.pull_request.labels includes "omni-test" ||
+ build.pull_request.labels includes "tts-test" ||
+ build.pull_request.labels includes "diffusion-x2iat-test" ||
+ build.pull_request.labels includes "diffusion-x2v-test"
+ ))
commands:
- buildkite-agent pipeline upload .buildkite/test-nightly.yml
agents:
diff --git a/.buildkite/test-nightly-diffusion.yml b/.buildkite/test-nightly-diffusion.yml
deleted file mode 100644
index b5ba8a117c..0000000000
--- a/.buildkite/test-nightly-diffusion.yml
+++ /dev/null
@@ -1,417 +0,0 @@
-# Nightly diffusion GPU tests — appended to the main nightly build via
-# buildkite-agent pipeline upload .buildkite/test-nightly-diffusion.yml
-# from test-nightly.yml (step key: nightly-diffusion-model-test). Top-level groups are
-# foldable in the Buildkite UI (Other / Wan / Qwen-Image).
-env:
- VLLM_WORKER_MULTIPROC_METHOD: spawn
- HF_HUB_DOWNLOAD_TIMEOUT: 300
- HF_HUB_ETAG_TIMEOUT: 60
-
-steps:
- - group: ":card_index_dividers: Other Model Test"
- key: nightly-other-model-test-group
- steps:
- - label: ":full_moon: Diffusion · Other · Function Test with H100"
- timeout_in_minutes: 120
- # Shared nightly vs PR label conditional; referenced below as *nightly_or_pr_label
- if: &nightly_or_pr_label 'build.env("NIGHTLY") == "1" || build.pull_request.labels includes "nightly-test"'
- commands:
- - pytest -s -v tests/e2e/online_serving/test_*_expansion.py -k "not test_wan22_expansion and not test_wan_2_1_vace_expansion and not test_qwen_image" -m "advanced_model and diffusion and H100" --run-level "advanced_model"
- agents:
- queue: "mithril-h100-pool"
- plugins:
- - kubernetes:
- podSpec:
- containers:
- - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
- resources:
- limits:
- nvidia.com/gpu: 2
- volumeMounts:
- - name: devshm
- mountPath: /dev/shm
- - name: hf-cache
- mountPath: /root/.cache/huggingface
- env:
- - name: HF_HOME
- value: /root/.cache/huggingface
- - name: HF_TOKEN
- valueFrom:
- secretKeyRef:
- name: hf-token-secret
- key: token
- nodeSelector:
- node.kubernetes.io/instance-type: gpu-h100-sxm
- volumes:
- - name: devshm
- emptyDir:
- medium: Memory
- - name: hf-cache
- hostPath:
- path: /mnt/hf-cache
- type: DirectoryOrCreate
-
- - label: ":full_moon: Diffusion · Other · Function Test with L4"
- timeout_in_minutes: 60
- if: *nightly_or_pr_label
- commands:
- - pytest -s -v tests/e2e/online_serving/test_*_expansion.py -m "advanced_model and diffusion and L4" --run-level "advanced_model"
- agents:
- queue: "gpu_4_queue" # g6.12xlarge instance on AWS, has 4 L4 GPU
- plugins:
- - docker#v5.2.0:
- image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
- always-pull: true
- propagate-environment: true
- shm-size: "8gb"
- environment:
- - "HF_HOME=/fsx/hf_cache"
- - "HF_TOKEN"
- volumes:
- - "/fsx/hf_cache:/fsx/hf_cache"
-
- - label: ":full_moon: Diffusion · Other · Doc Test"
- timeout_in_minutes: 60
- if: *nightly_or_pr_label
- commands:
- - export VLLM_TEST_CLEAN_GPU_MEMORY="1"
- - pytest -s -v tests/examples/online_serving/test_text_to_image.py tests/examples/offline_inference/test_text_to_image.py -m "advanced_model and example and H100" --run-level "advanced_model"
- agents:
- queue: "mithril-h100-pool"
- plugins:
- - kubernetes:
- podSpec:
- containers:
- - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
- resources:
- limits:
- nvidia.com/gpu: 2
- volumeMounts:
- - name: devshm
- mountPath: /dev/shm
- - name: hf-cache
- mountPath: /root/.cache/huggingface
- env:
- - name: HF_HOME
- value: /root/.cache/huggingface
- - name: HF_TOKEN
- valueFrom:
- secretKeyRef:
- name: hf-token-secret
- key: token
- nodeSelector:
- node.kubernetes.io/instance-type: gpu-h100-sxm
- volumes:
- - name: devshm
- emptyDir:
- medium: Memory
- - name: hf-cache
- hostPath:
- path: /mnt/hf-cache
- type: DirectoryOrCreate
-
- - group: ":card_index_dividers: Wan Series Model Test"
- key: nightly-wan-model-test-group
- steps:
- - label: ":full_moon: Diffusion · Wan · Function Test"
- timeout_in_minutes: 90
- if: *nightly_or_pr_label
- commands:
- - pytest -s -v tests/e2e/online_serving/test_wan22_expansion.py tests/e2e/online_serving/test_wan_2_1_vace_expansion.py -m "advanced_model" --run-level "advanced_model"
- agents:
- queue: "mithril-h100-pool"
- plugins:
- - kubernetes:
- podSpec:
- containers:
- - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
- resources:
- limits:
- nvidia.com/gpu: 2
- volumeMounts:
- - name: devshm
- mountPath: /dev/shm
- - name: hf-cache
- mountPath: /root/.cache/huggingface
- env:
- - name: HF_HOME
- value: /root/.cache/huggingface
- - name: HF_TOKEN
- valueFrom:
- secretKeyRef:
- name: hf-token-secret
- key: token
- nodeSelector:
- node.kubernetes.io/instance-type: gpu-h100-sxm
- volumes:
- - name: devshm
- emptyDir:
- medium: Memory
- - name: hf-cache
- hostPath:
- path: /mnt/hf-cache
- type: DirectoryOrCreate
-
- - label: ":full_moon: Diffusion · Wan · Accuracy Test"
- key: nightly-wan22-i2v-accuracy
- timeout_in_minutes: 180
- if: *nightly_or_pr_label
- commands:
- - pytest -s -v tests/e2e/accuracy/wan22_i2v/test_wan22_i2v_video_similarity.py --run-level advanced_model
- agents:
- queue: "mithril-h100-pool"
- plugins:
- - kubernetes:
- podSpec:
- containers:
- - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
- resources:
- limits:
- nvidia.com/gpu: 2
- volumeMounts:
- - name: devshm
- mountPath: /dev/shm
- - name: hf-cache
- mountPath: /root/.cache/huggingface
- env:
- - name: HF_HOME
- value: /root/.cache/huggingface
- - name: HF_TOKEN
- valueFrom:
- secretKeyRef:
- name: hf-token-secret
- key: token
- nodeSelector:
- node.kubernetes.io/instance-type: gpu-h100-sxm
- volumes:
- - name: devshm
- emptyDir:
- medium: Memory
- - name: hf-cache
- hostPath:
- path: /mnt/hf-cache
- type: DirectoryOrCreate
-
- - group: ":card_index_dividers: Qwen-Image Series Model Test"
- key: nightly-qwen-image-edit-group
- steps:
- - label: ":full_moon: Diffusion · Qwen-Image · Function Test with H100"
- timeout_in_minutes: 120
- if: *nightly_or_pr_label
- commands:
- - pytest -s -v tests/e2e/online_serving/test_qwen_image*_expansion.py -m "advanced_model and diffusion and H100" --run-level "advanced_model"
- agents:
- queue: "mithril-h100-pool"
- plugins:
- - kubernetes:
- podSpec:
- containers:
- - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
- resources:
- limits:
- nvidia.com/gpu: 2
- volumeMounts:
- - name: devshm
- mountPath: /dev/shm
- - name: hf-cache
- mountPath: /root/.cache/huggingface
- env:
- - name: HF_HOME
- value: /root/.cache/huggingface
- - name: HF_TOKEN
- valueFrom:
- secretKeyRef:
- name: hf-token-secret
- key: token
- nodeSelector:
- node.kubernetes.io/instance-type: gpu-h100-sxm
- volumes:
- - name: devshm
- emptyDir:
- medium: Memory
- - name: hf-cache
- hostPath:
- path: /mnt/hf-cache
- type: DirectoryOrCreate
-
- - label: ":full_moon: Diffusion · Qwen-Image · GEBench Accuracy Test"
- key: nightly-gebench-accuracy
- timeout_in_minutes: 60
- if: *nightly_or_pr_label
- commands:
- - pytest -s -v tests/e2e/accuracy/test_gebench_h100_smoke.py --run-level advanced_model --gebench-model Qwen/Qwen-Image-2512 --accuracy-judge-model QuantTrio/Qwen3-VL-30B-A3B-Instruct-AWQ --accuracy-gpu 0 --gebench-port 8093 --accuracy-workers 1
- - buildkite-agent artifact upload "tests/e2e/accuracy/artifacts/gebench_qwen-image-2512/summary*.json"
- agents:
- queue: "mithril-h100-pool"
- plugins:
- - kubernetes:
- podSpec:
- containers:
- - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
- resources:
- limits:
- nvidia.com/gpu: 1
- volumeMounts:
- - name: devshm
- mountPath: /dev/shm
- - name: hf-cache
- mountPath: /root/.cache/huggingface
- env:
- - name: HF_HOME
- value: /root/.cache/huggingface
- - name: HF_TOKEN
- valueFrom:
- secretKeyRef:
- name: hf-token-secret
- key: token
- nodeSelector:
- node.kubernetes.io/instance-type: gpu-h100-sxm
- volumes:
- - name: devshm
- emptyDir:
- medium: Memory
- - name: hf-cache
- hostPath:
- path: /mnt/hf-cache
- type: DirectoryOrCreate
-
- - label: ":full_moon: Diffusion · Qwen-Image · GEdit-Bench Accuracy Test"
- key: nightly-gedit-bench-accuracy
- timeout_in_minutes: 60
- if: *nightly_or_pr_label
- commands:
- - pytest -s -v tests/e2e/accuracy/test_gedit_bench_h100_smoke.py --run-level advanced_model --gedit-model Qwen/Qwen-Image-Edit --accuracy-judge-model QuantTrio/Qwen3-VL-30B-A3B-Instruct-AWQ --accuracy-gpu 0 --gedit-port 8093 --gedit-samples-per-group 20 --accuracy-workers 1
- - buildkite-agent artifact upload "tests/e2e/accuracy/artifacts/gedit_scores_qwen-image-edit/qwen-image-edit_all_all_vie_score_*.csv"
- - buildkite-agent artifact upload "tests/e2e/accuracy/artifacts/gedit_scores_qwen-image-edit/qwen-image-edit_all_all_summary_*.json"
- agents:
- queue: "mithril-h100-pool"
- plugins:
- - kubernetes:
- podSpec:
- containers:
- - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
- resources:
- limits:
- nvidia.com/gpu: 1
- volumeMounts:
- - name: devshm
- mountPath: /dev/shm
- - name: hf-cache
- mountPath: /root/.cache/huggingface
- env:
- - name: HF_HOME
- value: /root/.cache/huggingface
- - name: VLLM_HTTP_TIMEOUT_KEEP_ALIVE
- value: "120"
- - name: HF_TOKEN
- valueFrom:
- secretKeyRef:
- name: hf-token-secret
- key: token
- nodeSelector:
- node.kubernetes.io/instance-type: gpu-h100-sxm
- volumes:
- - name: devshm
- emptyDir:
- medium: Memory
- - name: hf-cache
- hostPath:
- path: /mnt/hf-cache
- type: DirectoryOrCreate
-
- - label: ":full_moon: Diffusion · Qwen-Image · Perf Test"
- key: nightly-qwen-image-performance
- timeout_in_minutes: 180
- if: *nightly_or_pr_label
- commands:
- - export DIFFUSION_BENCHMARK_DIR=tests/dfx/perf/results
- - export DIFFUSION_ATTENTION_BACKEND=FLASH_ATTN
- - export CACHE_DIT_VERSION=1.3.0
- # [HACK]: run upload in the same command block as pytest.
- # Because `exit` aborts the entire commands list.
- - |
- set +e
- pytest -s -v tests/dfx/perf/scripts/run_diffusion_benchmark.py --config-file tests/dfx/perf/tests/test_qwen_image_vllm_omni.json
- EXIT1=$$?
- pytest -s -v tests/dfx/perf/scripts/run_diffusion_benchmark.py --config-file tests/dfx/perf/tests/test_qwen_image_edit_vllm_omni.json
- EXIT2=$$?
- pytest -s -v tests/dfx/perf/scripts/run_diffusion_benchmark.py --config-file tests/dfx/perf/tests/test_qwen_image_edit_2509_vllm_omni.json
- EXIT3=$$?
- if [ $$EXIT1 -eq 0 ] || [ $$EXIT2 -eq 0 ] || [ $$EXIT3 -eq 0 ]; then
- buildkite-agent artifact upload "tests/dfx/perf/results/diffusion_result_*.json"
- buildkite-agent artifact upload "tests/dfx/perf/results/logs/*.log"
- fi
- exit $$((EXIT1 | EXIT2 | EXIT3))
- agents:
- queue: "mithril-h100-pool"
- plugins:
- - kubernetes:
- podSpec:
- containers:
- - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
- resources:
- limits:
- nvidia.com/gpu: 4
- volumeMounts:
- - name: devshm
- mountPath: /dev/shm
- - name: hf-cache
- mountPath: /root/.cache/huggingface
- env:
- - name: HF_HOME
- value: /root/.cache/huggingface
- - name: HF_TOKEN
- valueFrom:
- secretKeyRef:
- name: hf-token-secret
- key: token
- nodeSelector:
- node.kubernetes.io/instance-type: gpu-h100-sxm
- volumes:
- - name: devshm
- emptyDir:
- medium: Memory
- - name: hf-cache
- hostPath:
- path: /mnt/hf-cache
- type: DirectoryOrCreate
-
- - label: ":full_moon: Diffusion · Qwen-Image · Accuracy Test"
- key: nightly-qwen-image-accuracy
- timeout_in_minutes: 180
- if: *nightly_or_pr_label
- commands:
- - pytest -s -v tests/e2e/accuracy/test_qwen_image*.py --run-level advanced_model
- agents:
- queue: "mithril-h100-pool"
- plugins:
- - kubernetes:
- podSpec:
- containers:
- - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
- resources:
- limits:
- nvidia.com/gpu: 1
- volumeMounts:
- - name: devshm
- mountPath: /dev/shm
- - name: hf-cache
- mountPath: /root/.cache/huggingface
- env:
- - name: HF_HOME
- value: /root/.cache/huggingface
- - name: HF_TOKEN
- valueFrom:
- secretKeyRef:
- name: hf-token-secret
- key: token
- nodeSelector:
- node.kubernetes.io/instance-type: gpu-h100-sxm
- volumes:
- - name: devshm
- emptyDir:
- medium: Memory
- - name: hf-cache
- hostPath:
- path: /mnt/hf-cache
- type: DirectoryOrCreate
diff --git a/.buildkite/test-nightly.yml b/.buildkite/test-nightly.yml
index 31b3e17976..58e1e55af7 100644
--- a/.buildkite/test-nightly.yml
+++ b/.buildkite/test-nightly.yml
@@ -7,12 +7,11 @@ steps:
# Group: collapses under one heading in the Buildkite UI; child steps still run in parallel.
- group: ":card_index_dividers: Omni Model Test"
key: nightly-omni-test-group
+ depends_on: upload-nightly-pipeline
+ if: build.env("NIGHTLY") == "1" || build.pull_request.labels includes "nightly-test" || build.pull_request.labels includes "omni-test"
steps:
- - label: ":full_moon: Omni · Function Test with H100"
+ - label: ":full_moon: Omni · Function Test"
timeout_in_minutes: 90
- depends_on: upload-nightly-pipeline
- # Shared nightly vs PR label conditional; referenced below as *nightly_or_pr_label
- if: &nightly_or_pr_label 'build.env("NIGHTLY") == "1" || build.pull_request.labels includes "nightly-test"'
commands:
- pytest -s -v tests/e2e/online_serving/test_*_expansion.py -m "advanced_model and H100 and omni" --run-level "advanced_model"
agents:
@@ -49,13 +48,11 @@ steps:
path: /mnt/hf-cache
type: DirectoryOrCreate
- - label: ":full_moon: Omni · Function Test with L4"
+ - label: ":full_moon: Omni · Doc Test with L4"
timeout_in_minutes: 90
- depends_on: upload-nightly-pipeline
- if: *nightly_or_pr_label
commands:
- export VLLM_ALLOW_LONG_MAX_MODEL_LEN="1"
- - pytest -s -v tests/e2e/online_serving/test_*_expansion.py -m "advanced_model and L4 and omni" --run-level "advanced_model"
+ - pytest -s -v tests/examples/ -m "advanced_model and omni and L4" --run-level "advanced_model"
agents:
queue: "gpu_4_queue" # g6.12xlarge instance on AWS, has 4 L4 GPU
plugins:
@@ -70,13 +67,203 @@ steps:
volumes:
- "/fsx/hf_cache:/fsx/hf_cache"
- - label: ":full_moon: Omni · Doc Test with L4"
+ - label: ":full_moon: Omni · Doc Test with H100"
+ timeout_in_minutes: 90
+ commands:
+ - pytest -s -v tests/examples/ -m "advanced_model and omni and H100" --run-level "advanced_model"
+ agents:
+ queue: "mithril-h100-pool"
+ plugins:
+ - kubernetes:
+ podSpec:
+ containers:
+ - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
+ resources:
+ limits:
+ nvidia.com/gpu: 2
+ volumeMounts:
+ - name: devshm
+ mountPath: /dev/shm
+ - name: hf-cache
+ mountPath: /root/.cache/huggingface
+ env:
+ - name: HF_HOME
+ value: /root/.cache/huggingface
+ - name: HF_TOKEN
+ valueFrom:
+ secretKeyRef:
+ name: hf-token-secret
+ key: token
+ nodeSelector:
+ node.kubernetes.io/instance-type: gpu-h100-sxm
+ volumes:
+ - name: devshm
+ emptyDir:
+ medium: Memory
+ - name: hf-cache
+ hostPath:
+ path: /mnt/hf-cache
+ type: DirectoryOrCreate
+
+ - label: ":full_moon: Omni · Perf Test"
+ key: nightly-omni-performance
+ timeout_in_minutes: 180
+ commands:
+ - export BENCHMARK_DIR=tests/dfx/perf/results
+ - pytest -s -v tests/dfx/perf/scripts/run_benchmark.py --test-config-file tests/dfx/perf/tests/test_qwen_omni.json
+ - buildkite-agent artifact upload "tests/dfx/perf/results/*.json"
+ agents:
+ queue: "mithril-h100-pool"
+ plugins:
+ - kubernetes:
+ podSpec:
+ containers:
+ - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
+ resources:
+ limits:
+ nvidia.com/gpu: 2
+ volumeMounts:
+ - name: devshm
+ mountPath: /dev/shm
+ - name: hf-cache
+ mountPath: /root/.cache/huggingface
+ env:
+ - name: HF_HOME
+ value: /root/.cache/huggingface
+ - name: HF_TOKEN
+ valueFrom:
+ secretKeyRef:
+ name: hf-token-secret
+ key: token
+ nodeSelector:
+ node.kubernetes.io/instance-type: gpu-h100-sxm
+ volumes:
+ - name: devshm
+ emptyDir:
+ medium: Memory
+ - name: hf-cache
+ hostPath:
+ path: /mnt/hf-cache
+ type: DirectoryOrCreate
+
+
+ - group: ":card_index_dividers: TTS Model Test"
+ key: nightly-tts-test-group
+ depends_on: upload-nightly-pipeline
+ if: build.env("NIGHTLY") == "1" || build.pull_request.labels includes "nightly-test" || build.pull_request.labels includes "tts-test"
+ steps:
+ - label: ":full_moon: TTS · Function Test"
timeout_in_minutes: 90
- depends_on: upload-nightly-pipeline
- if: *nightly_or_pr_label
commands:
- export VLLM_ALLOW_LONG_MAX_MODEL_LEN="1"
- - pytest -s -v tests/examples/ -m "advanced_model and omni and L4" --run-level "advanced_model"
+ - pytest -s -v tests/e2e/online_serving/test_*_expansion.py -m "advanced_model and L4 and omni" --run-level "advanced_model"
+ agents:
+ queue: "gpu_1_queue" # g6.12xlarge instance on AWS, has 4 L4 GPU
+ plugins:
+ - docker#v5.2.0:
+ image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
+ always-pull: true
+ propagate-environment: true
+ shm-size: "8gb"
+ environment:
+ - "HF_HOME=/fsx/hf_cache"
+ - "HF_TOKEN"
+ volumes:
+ - "/fsx/hf_cache:/fsx/hf_cache"
+
+ - label: ":full_moon: TTS · Perf Test"
+ key: nightly-tts-performance
+ timeout_in_minutes: 180
+ commands:
+ - export BENCHMARK_DIR=tests/dfx/perf/results
+ - export VLLM_ALLOW_LONG_MAX_MODEL_LEN="1"
+ - pytest -s -v tests/dfx/perf/scripts/run_benchmark.py --test-config-file tests/dfx/perf/tests/test_tts.json
+ - buildkite-agent artifact upload "tests/dfx/perf/results/*.json"
+ agents:
+ queue: "mithril-h100-pool"
+ plugins:
+ - kubernetes:
+ podSpec:
+ containers:
+ - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
+ resources:
+ limits:
+ nvidia.com/gpu: 1
+ volumeMounts:
+ - name: devshm
+ mountPath: /dev/shm
+ - name: hf-cache
+ mountPath: /root/.cache/huggingface
+ env:
+ - name: HF_HOME
+ value: /root/.cache/huggingface
+ - name: HF_TOKEN
+ valueFrom:
+ secretKeyRef:
+ name: hf-token-secret
+ key: token
+ nodeSelector:
+ node.kubernetes.io/instance-type: gpu-h100-sxm
+ volumes:
+ - name: devshm
+ emptyDir:
+ medium: Memory
+ - name: hf-cache
+ hostPath:
+ path: /mnt/hf-cache
+ type: DirectoryOrCreate
+
+ # Diffusion X2I suite: x2i / x2a / x2t and related non-video paths; x2v is only in "Diffusion X2V Model Test" below.
+ - group: ":card_index_dividers: Diffusion X2I(&A&T) Model Test"
+ key: nightly-diffusion-x2iat-group
+ depends_on: upload-nightly-pipeline
+ if: >-
+ build.env("NIGHTLY") == "1" ||
+ build.pull_request.labels includes "nightly-test" ||
+ build.pull_request.labels includes "diffusion-x2iat-test"
+ steps:
+ - label: ":full_moon: Diffusion X2I(&A&T) · Function Test with H100"
+ timeout_in_minutes: 120
+ commands:
+ - pytest -s -v tests/e2e/online_serving/test_*_expansion.py -k "not test_wan22_expansion and not test_wan_2_1_vace_expansion and not hunyuan" -m "advanced_model and diffusion and H100" --run-level "advanced_model"
+ agents:
+ queue: "mithril-h100-pool"
+ plugins:
+ - kubernetes:
+ podSpec:
+ containers:
+ - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
+ resources:
+ limits:
+ nvidia.com/gpu: 2
+ volumeMounts:
+ - name: devshm
+ mountPath: /dev/shm
+ - name: hf-cache
+ mountPath: /root/.cache/huggingface
+ env:
+ - name: HF_HOME
+ value: /root/.cache/huggingface
+ - name: HF_TOKEN
+ valueFrom:
+ secretKeyRef:
+ name: hf-token-secret
+ key: token
+ nodeSelector:
+ node.kubernetes.io/instance-type: gpu-h100-sxm
+ volumes:
+ - name: devshm
+ emptyDir:
+ medium: Memory
+ - name: hf-cache
+ hostPath:
+ path: /mnt/hf-cache
+ type: DirectoryOrCreate
+
+ - label: ":full_moon: Diffusion X2I(&A&T) · Function Test with L4"
+ timeout_in_minutes: 60
+ commands:
+ - pytest -s -v tests/e2e/online_serving/test_*_expansion.py -k "not test_wan22_expansion and not test_wan_2_1_vace_expansion and not hunyuan" -m "advanced_model and diffusion and L4" --run-level "advanced_model"
agents:
queue: "gpu_4_queue" # g6.12xlarge instance on AWS, has 4 L4 GPU
plugins:
@@ -91,12 +278,11 @@ steps:
volumes:
- "/fsx/hf_cache:/fsx/hf_cache"
- - label: ":full_moon: Omni · Doc Test with H100"
- timeout_in_minutes: 90
- depends_on: upload-nightly-pipeline
- if: *nightly_or_pr_label
+ - label: ":full_moon: Diffusion X2I(&A&T) · Doc Test"
+ timeout_in_minutes: 60
commands:
- - pytest -s -v tests/examples/ -m "advanced_model and omni and H100" --run-level "advanced_model"
+ - export VLLM_TEST_CLEAN_GPU_MEMORY="1"
+ - pytest -s -v tests/examples/*/test_text_to_image.py -m "advanced_model and example and H100" --run-level "advanced_model"
agents:
queue: "mithril-h100-pool"
plugins:
@@ -131,16 +317,109 @@ steps:
path: /mnt/hf-cache
type: DirectoryOrCreate
- - label: ":full_moon: Omni · Perf Test"
- key: nightly-omni-performance
+ - label: ":full_moon: Diffusion X2I(&A&T) · GEBench Accuracy Test"
+ timeout_in_minutes: 60
+ commands:
+ - pytest -s -v tests/e2e/accuracy/test_gebench_h100_smoke.py --run-level advanced_model --gebench-model Qwen/Qwen-Image-2512 --accuracy-judge-model QuantTrio/Qwen3-VL-30B-A3B-Instruct-AWQ --accuracy-gpu 0 --gebench-port 8093 --accuracy-workers 1
+ - buildkite-agent artifact upload "tests/e2e/accuracy/artifacts/gebench_qwen-image-2512/summary*.json"
+ agents:
+ queue: "mithril-h100-pool"
+ plugins:
+ - kubernetes:
+ podSpec:
+ containers:
+ - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
+ resources:
+ limits:
+ nvidia.com/gpu: 1
+ volumeMounts:
+ - name: devshm
+ mountPath: /dev/shm
+ - name: hf-cache
+ mountPath: /root/.cache/huggingface
+ env:
+ - name: HF_HOME
+ value: /root/.cache/huggingface
+ - name: HF_TOKEN
+ valueFrom:
+ secretKeyRef:
+ name: hf-token-secret
+ key: token
+ nodeSelector:
+ node.kubernetes.io/instance-type: gpu-h100-sxm
+ volumes:
+ - name: devshm
+ emptyDir:
+ medium: Memory
+ - name: hf-cache
+ hostPath:
+ path: /mnt/hf-cache
+ type: DirectoryOrCreate
+
+ - label: ":full_moon: Diffusion X2I(&A&T) · GEdit-Bench Accuracy Test"
+ timeout_in_minutes: 60
+ commands:
+ - pytest -s -v tests/e2e/accuracy/test_gedit_bench_h100_smoke.py --run-level advanced_model --gedit-model Qwen/Qwen-Image-Edit --accuracy-judge-model QuantTrio/Qwen3-VL-30B-A3B-Instruct-AWQ --accuracy-gpu 0 --gedit-port 8093 --gedit-samples-per-group 20 --accuracy-workers 1
+ - buildkite-agent artifact upload "tests/e2e/accuracy/artifacts/gedit_scores_qwen-image-edit/qwen-image-edit_all_all_vie_score_*.csv"
+ - buildkite-agent artifact upload "tests/e2e/accuracy/artifacts/gedit_scores_qwen-image-edit/qwen-image-edit_all_all_summary_*.json"
+ agents:
+ queue: "mithril-h100-pool"
+ plugins:
+ - kubernetes:
+ podSpec:
+ containers:
+ - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
+ resources:
+ limits:
+ nvidia.com/gpu: 1
+ volumeMounts:
+ - name: devshm
+ mountPath: /dev/shm
+ - name: hf-cache
+ mountPath: /root/.cache/huggingface
+ env:
+ - name: HF_HOME
+ value: /root/.cache/huggingface
+ - name: VLLM_HTTP_TIMEOUT_KEEP_ALIVE
+ value: "120"
+ - name: HF_TOKEN
+ valueFrom:
+ secretKeyRef:
+ name: hf-token-secret
+ key: token
+ nodeSelector:
+ node.kubernetes.io/instance-type: gpu-h100-sxm
+ volumes:
+ - name: devshm
+ emptyDir:
+ medium: Memory
+ - name: hf-cache
+ hostPath:
+ path: /mnt/hf-cache
+ type: DirectoryOrCreate
+
+ - label: ":full_moon: Diffusion X2I(&A&T) · Perf Test"
+ key: nightly-diffusion-x2iat-performance
timeout_in_minutes: 180
- depends_on: upload-nightly-pipeline
- if: *nightly_or_pr_label
commands:
- - export BENCHMARK_DIR=tests/dfx/perf/results
- - export VLLM_ALLOW_LONG_MAX_MODEL_LEN="1"
- - pytest -s -v tests/dfx/perf/scripts/run_benchmark.py
- - buildkite-agent artifact upload "tests/dfx/perf/results/*.json"
+ - export DIFFUSION_BENCHMARK_DIR=tests/dfx/perf/results
+ - export DIFFUSION_ATTENTION_BACKEND=FLASH_ATTN
+ - export CACHE_DIT_VERSION=1.3.0
+ # [HACK]: run upload in the same command block as pytest.
+ # Because `exit` aborts the entire commands list.
+ - |
+ set +e
+ pytest -s -v tests/dfx/perf/scripts/run_diffusion_benchmark.py --test-config-file tests/dfx/perf/tests/test_qwen_image_vllm_omni.json
+ EXIT1=$$?
+ pytest -s -v tests/dfx/perf/scripts/run_diffusion_benchmark.py --test-config-file tests/dfx/perf/tests/test_qwen_image_edit_vllm_omni.json
+ EXIT2=$$?
+ pytest -s -v tests/dfx/perf/scripts/run_diffusion_benchmark.py --test-config-file tests/dfx/perf/tests/test_qwen_image_edit_2509_vllm_omni.json
+ EXIT3=$$?
+ if [ $$EXIT1 -eq 0 ] || [ $$EXIT2 -eq 0 ] || [ $$EXIT3 -eq 0 ]; then
+ buildkite-agent artifact upload "tests/dfx/perf/results/diffusion_result_*.json"
+ buildkite-agent artifact upload "tests/dfx/perf/results/logs/*.log"
+ fi
+ exit $$((EXIT1 | EXIT2 | EXIT3))
agents:
queue: "mithril-h100-pool"
plugins:
@@ -150,7 +429,7 @@ steps:
- image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
resources:
limits:
- nvidia.com/gpu: 2
+ nvidia.com/gpu: 4
volumeMounts:
- name: devshm
mountPath: /dev/shm
@@ -175,23 +454,96 @@ steps:
path: /mnt/hf-cache
type: DirectoryOrCreate
- # Dynamically appends steps from test-nightly-diffusion.yml into this build (same mechanism as
- # pipeline.yml → test-ready.yml / test-merge.yml / test-nightly.yml). Foldable groups stay in the
- # uploaded YAML (Other / Wan / Qwen-Image).
- - label: ":card_index_dividers: Diffusion Model Test"
- key: nightly-diffusion-model-test
+ # Diffusion x2v only (Wan, HunyuanVideo, …). x2i/x2a/x2t live in the X2I group above, not here.
+ - group: ":card_index_dividers: Diffusion X2V Model Test"
+ key: nightly-diffusion-x2v-group
depends_on: upload-nightly-pipeline
- if: *nightly_or_pr_label
- commands:
- - buildkite-agent pipeline upload .buildkite/test-nightly-diffusion.yml
- agents:
- queue: "cpu_queue_premerge"
+ if: >-
+ build.env("NIGHTLY") == "1" ||
+ build.pull_request.labels includes "nightly-test" ||
+ build.pull_request.labels includes "diffusion-x2v-test"
+ steps:
+ - label: ":full_moon: Diffusion X2V · Function Test"
+ timeout_in_minutes: 90
+ commands:
+ - pytest -s -v tests/e2e/online_serving/test_wan22_expansion.py tests/e2e/online_serving/test_wan_2_1_vace_expansion.py tests/e2e/online_serving/test_hunyuan_video_15_expansion.py -m "advanced_model" --run-level "advanced_model"
+ agents:
+ queue: "mithril-h100-pool"
+ plugins:
+ - kubernetes:
+ podSpec:
+ containers:
+ - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
+ resources:
+ limits:
+ nvidia.com/gpu: 2
+ volumeMounts:
+ - name: devshm
+ mountPath: /dev/shm
+ - name: hf-cache
+ mountPath: /root/.cache/huggingface
+ env:
+ - name: HF_HOME
+ value: /root/.cache/huggingface
+ - name: HF_TOKEN
+ valueFrom:
+ secretKeyRef:
+ name: hf-token-secret
+ key: token
+ nodeSelector:
+ node.kubernetes.io/instance-type: gpu-h100-sxm
+ volumes:
+ - name: devshm
+ emptyDir:
+ medium: Memory
+ - name: hf-cache
+ hostPath:
+ path: /mnt/hf-cache
+ type: DirectoryOrCreate
+
+ - label: ":full_moon: Diffusion X2V · Accuracy Test"
+ timeout_in_minutes: 180
+ commands:
+ - pytest -s -v tests/e2e/accuracy/wan22_i2v/test_wan22_i2v_video_similarity.py --run-level advanced_model
+ agents:
+ queue: "mithril-h100-pool"
+ plugins:
+ - kubernetes:
+ podSpec:
+ containers:
+ - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
+ resources:
+ limits:
+ nvidia.com/gpu: 2
+ volumeMounts:
+ - name: devshm
+ mountPath: /dev/shm
+ - name: hf-cache
+ mountPath: /root/.cache/huggingface
+ env:
+ - name: HF_HOME
+ value: /root/.cache/huggingface
+ - name: HF_TOKEN
+ valueFrom:
+ secretKeyRef:
+ name: hf-token-secret
+ key: token
+ nodeSelector:
+ node.kubernetes.io/instance-type: gpu-h100-sxm
+ volumes:
+ - name: devshm
+ emptyDir:
+ medium: Memory
+ - name: hf-cache
+ hostPath:
+ path: /mnt/hf-cache
+ type: DirectoryOrCreate
- label: ":bar_chart: Testcase Statistics"
key: nightly-testcase-statistics
timeout_in_minutes: 120
depends_on: upload-nightly-pipeline
- if: *nightly_or_pr_label
+ if: build.env("NIGHTLY") == "1" || build.pull_request.labels includes "nightly-test"
commands:
- python tools/nightly/buildkite_testcase_statistics.py -o tests/dfx/perf/results/buildkite_testcase_statistics.html
- buildkite-agent artifact upload "tests/dfx/perf/results/*.html"
@@ -234,15 +586,17 @@ steps:
key: nightly-perf-distribution
depends_on:
- nightly-omni-performance
- - nightly-qwen-image-performance
+ - nightly-tts-performance
+ - nightly-diffusion-x2iat-performance
- nightly-testcase-statistics
if: build.env("NIGHTLY") == "1"
commands:
- pip install openpyxl
- export DEFAULT_INPUT_DIR=tests/dfx/perf/results
- export DEFAULT_OUTPUT_DIR=tests/dfx/perf/results
+ - buildkite-agent artifact download "tests/dfx/perf/results/*.json" . --step nightly-tts-performance
- buildkite-agent artifact download "tests/dfx/perf/results/*.json" . --step nightly-omni-performance
- - buildkite-agent artifact download "tests/dfx/perf/results/*.json" . --step nightly-qwen-image-performance
+ - buildkite-agent artifact download "tests/dfx/perf/results/*.json" . --step nightly-diffusion-x2iat-performance
- buildkite-agent artifact download "tests/dfx/perf/results/*.html" . --step nightly-testcase-statistics
- python tools/nightly/generate_nightly_perf_excel.py
- python tools/nightly/generate_nightly_perf_html.py
diff --git a/docs/contributing/ci/CI_5levels.md b/docs/contributing/ci/CI_5levels.md
index 9306035738..b0428ddd7d 100644
--- a/docs/contributing/ci/CI_5levels.md
+++ b/docs/contributing/ci/CI_5levels.md
@@ -86,7 +86,8 @@ Through five levels (L1-L5) and common (Common) specifications, the system clari
/tests/e2e/online_serving/test_{model_name}_expansion.py
/tests/e2e/offline_inference/test_{model_name}_expansion.py
Performance:
- /tests/dfx/perf/tests/test.json
+ /tests/dfx/perf/tests/test_qwen_omni.json (Omni), test_tts.json (TTS),
+ and /tests/dfx/perf/tests/test_{diffusion_model}_vllm_omni.json (Diffusion)
Doc Test:
tests/example/online_serving/test_{model_name}.py
tests/example/offline_inference/test_{model_name}.py
@@ -530,13 +531,13 @@ L4 level testing is a comprehensive quality audit before a version release. It e
### 3.2 Testing Content and Scope
- ***Full Functionality Testing***: Executes all test cases defined in `test_{model_name}_expansion.py`, covering all implemented features, positive flows, boundary conditions, and exception handling.
-- ***Performance Testing***: Uses the `tests/dfx/perf/tests/test.json` configuration file to drive performance testing tools for stress, load, and endurance tests, collecting metrics like throughput, response time, and resource utilization.
+- ***Performance Testing***: Uses `tests/dfx/perf/tests/test_qwen_omni.json`, `tests/dfx/perf/tests/test_tts.json`, and diffusion configs in the form `tests/dfx/perf/tests/test_*_vllm_omni.json` (passed to `run_benchmark.py` via `--test-config-file`) to drive performance testing tools for stress, load, and endurance tests, collecting metrics like throughput, response time, and resource utilization.
- ***Documentation Testing***: Verifies whether the example code provided to users is runnable and its results match the description.
### 3.3 Test Directory and Execution Files
- ***Functional Testing***: Same directories as L3.
-- ***Performance Test Configuration***: `tests/dfx/perf/tests/test.json`
+- ***Performance Test Configuration***: `tests/dfx/perf/tests/test_qwen_omni.json`, `tests/dfx/perf/tests/test_tts.json`, and diffusion configs `tests/dfx/perf/tests/test_*_vllm_omni.json` (e.g. `test_qwen_image_vllm_omni.json`)
- ***Documentation Example Tests***:
- - `tests/example/online_serving/test_{model_name}.py`
- `tests/example/offline_inference/test_{model_name}.py`
diff --git a/docs/contributing/ci/test_examples/l4_performance_tests.inc.md b/docs/contributing/ci/test_examples/l4_performance_tests.inc.md
index 8093e1459f..f1f3073dc5 100644
--- a/docs/contributing/ci/test_examples/l4_performance_tests.inc.md
+++ b/docs/contributing/ci/test_examples/l4_performance_tests.inc.md
@@ -1,4 +1,4 @@
-When you want to add L4-level ***performance test*** cases, you can refer to the following format for case addition in tests/dfx/perf/tests/test.json:
+When you want to add L4-level ***performance test*** cases, you can refer to the following format for case addition in `tests/dfx/perf/tests/test_qwen_omni.json`, `tests/dfx/perf/tests/test_tts.json`, or diffusion configs such as `tests/dfx/perf/tests/test_*_vllm_omni.json` (selected via `pytest ... run_benchmark.py --test-config-file `):
```JSON
{
diff --git a/docs/contributing/ci/test_guide.md b/docs/contributing/ci/test_guide.md
index 425f24332c..08b2e3b4ea 100644
--- a/docs/contributing/ci/test_guide.md
+++ b/docs/contributing/ci/test_guide.md
@@ -45,7 +45,6 @@ Our test scripts use the pytest framework. First, please use `git clone https://
=== "L3 level & L4 level"
```bash
- cd tests
pytest -s -v -m "advanced_model" --run-level=advanced_model
```
If you only want to run L3 test case, you can use:
@@ -60,9 +59,9 @@ Our test scripts use the pytest framework. First, please use `git clone https://
```bash
pytest -s -v -m "core_model and distributed_cuda and L4" --run-level=core_model
```
- Note: To run performance tests, use:
+ Note: To run performance tests (defaults to ``test_qwen_omni.json``; use ``--test-config-file tests/dfx/perf/tests/test_tts.json`` for TTS):
```bash
- pytest -s -v perf/scripts/run_benchmark.py
+ pytest -s -v tests/dfx/perf/scripts/run_benchmark.py
```
The latest L3 test commands for various test suites can be found in the [pipeline](https://github.com/vllm-project/vllm-omni/blob/main/.buildkite/test-merge.yml).
diff --git a/tests/dfx/conftest.py b/tests/dfx/conftest.py
index e54141b344..997f25e6e5 100644
--- a/tests/dfx/conftest.py
+++ b/tests/dfx/conftest.py
@@ -2,6 +2,8 @@
from pathlib import Path
from typing import Any
+import pytest
+
from tests.conftest import modify_stage_config
@@ -95,3 +97,13 @@ def create_benchmark_indices(
indices.append((test_name, idx))
return indices
+
+
+def pytest_addoption(parser: pytest.Parser) -> None:
+ """Register shared CLI options for DFX benchmark suites."""
+ parser.addoption(
+ "--test-config-file",
+ action="store",
+ default=None,
+ help=("Path to benchmark config JSON. Example: --test-config-file tests/dfx/perf/tests/test_tts.json"),
+ )
diff --git a/tests/dfx/perf/scripts/run_benchmark.py b/tests/dfx/perf/scripts/run_benchmark.py
index 67dedcd048..d5ef1b49e7 100644
--- a/tests/dfx/perf/scripts/run_benchmark.py
+++ b/tests/dfx/perf/scripts/run_benchmark.py
@@ -21,10 +21,30 @@
os.environ["VLLM_TEST_CLEAN_GPU_MEMORY"] = "0"
-CONFIG_FILE_PATH = str(Path(__file__).parent.parent / "tests" / "test.json")
-BENCHMARK_CONFIGS = load_configs(CONFIG_FILE_PATH)
-STAGE_INIT_TIMEOUT = 600
+def _get_config_file_from_argv() -> str | None:
+ """Read ``--test-config-file`` from ``sys.argv`` at import time so parametrization can use it."""
+ import sys
+
+ for i, arg in enumerate(sys.argv):
+ if arg == "--test-config-file" and i + 1 < len(sys.argv):
+ return sys.argv[i + 1]
+ if arg.startswith("--test-config-file="):
+ return arg.split("=", 1)[1]
+ return None
+
+
+_PERF_TESTS_DIR = Path(__file__).resolve().parent.parent / "tests"
+_DEFAULT_CONFIG_FILE = str(_PERF_TESTS_DIR / "test_qwen_omni.json")
+
+CONFIG_FILE_PATH = _get_config_file_from_argv()
+if CONFIG_FILE_PATH is None:
+ print(
+ "No --test-config-file in argv, using default: tests/dfx/perf/tests/test_qwen_omni.json "
+ "(override with e.g. --test-config-file tests/dfx/perf/tests/test_tts.json)"
+ )
+ CONFIG_FILE_PATH = _DEFAULT_CONFIG_FILE
+BENCHMARK_CONFIGS = load_configs(CONFIG_FILE_PATH)
STAGE_CONFIGS_DIR = Path(__file__).parent.parent / "stage_configs"
test_params = create_unique_server_params(BENCHMARK_CONFIGS, STAGE_CONFIGS_DIR)
@@ -44,7 +64,7 @@ def omni_server(request):
print(f"Starting OmniServer with test: {test_name}, model: {model}")
- server_args = ["--stage-init-timeout", str(STAGE_INIT_TIMEOUT), "--init-timeout", "900"]
+ server_args = ["--stage-init-timeout", "300", "--init-timeout", "900"]
if stage_config_path:
server_args = ["--stage-configs-path", stage_config_path] + server_args
with OmniServer(model, server_args) as server:
@@ -97,8 +117,6 @@ def run_benchmark(
["vllm", "bench", "serve", "--omni"]
+ args
+ [
- "--num-warmups",
- "2",
"--save-result",
"--result-dir",
os.environ.get("BENCHMARK_DIR", "tests"),
@@ -141,7 +159,6 @@ def run_benchmark(
result["random_output_len"] = random_output_len
with open(result_path, "w", encoding="utf-8") as f:
json.dump(result, f, ensure_ascii=False, indent=2)
-
return result
@@ -207,10 +224,6 @@ def _resolve_baseline_value(
f"or request_rate={request_rate!r}; keys={list(baseline_raw.keys())!r}"
)
if isinstance(baseline_raw, (list, tuple)):
- if sweep_index is None:
- raise ValueError("list baseline requires sweep_index")
- if not (0 <= sweep_index < len(baseline_raw)):
- raise IndexError(f"baseline list len={len(baseline_raw)} has no index {sweep_index}")
return baseline_raw[sweep_index]
return baseline_raw
@@ -245,14 +258,14 @@ def assert_result(
) -> None:
assert result["completed"] == num_prompt, "Request failures exist"
baseline_data = params.get("baseline", {})
- thresholds = _baseline_thresholds_for_step(
- baseline_data,
- sweep_index=sweep_index,
- max_concurrency=max_concurrency,
- request_rate=request_rate,
- )
- for metric_name, baseline_value in thresholds.items():
+ for metric_name, baseline_raw in baseline_data.items():
current_value = result[metric_name]
+ baseline_value = _resolve_baseline_value(
+ baseline_raw,
+ sweep_index=sweep_index,
+ max_concurrency=max_concurrency,
+ request_rate=request_rate,
+ )
if "throughput" in metric_name:
if current_value <= baseline_value:
print(
diff --git a/tests/dfx/perf/scripts/run_diffusion_benchmark.py b/tests/dfx/perf/scripts/run_diffusion_benchmark.py
index 123f21405e..8eeeec8df2 100644
--- a/tests/dfx/perf/scripts/run_diffusion_benchmark.py
+++ b/tests/dfx/perf/scripts/run_diffusion_benchmark.py
@@ -5,8 +5,8 @@
- vllm-omni (default): starts DiffusionServer via vllm_omni.entrypoints.cli.main,
benchmarks with diffusion_benchmark_serving.py --backend vllm-omni
-A config JSON file is REQUIRED via --config-file:
- pytest run_diffusion_benchmark.py --config-file tests/dfx/perf/tests/test_qwen_image_vllm_omni.json
+A config JSON file is REQUIRED via --test-config-file:
+ pytest run_diffusion_benchmark.py --test-config-file tests/dfx/perf/tests/test_qwen_image_vllm_omni.json
JSON config entries use a "server_type" field, and this runner executes
the vllm-omni path.
@@ -55,16 +55,16 @@
def _get_config_file_from_argv() -> str | None:
- """Read --config-file from sys.argv at import time so pytest parametrize can use it.
+ """Read --test-config-file from sys.argv at import time so pytest parametrize can use it.
pytest_addoption (below) registers the same flag so pytest does not reject it.
- Supports both ``--config-file path`` and ``--config-file=path`` forms.
+ Supports both ``--test-config-file path`` and ``--test-config-file=path`` forms.
Returns None if the flag is not present; callers must handle the missing case.
"""
for i, arg in enumerate(sys.argv):
- if arg == "--config-file" and i + 1 < len(sys.argv):
+ if arg == "--test-config-file" and i + 1 < len(sys.argv):
return sys.argv[i + 1]
- if arg.startswith("--config-file="):
+ if arg.startswith("--test-config-file="):
return arg.split("=", 1)[1]
return None
@@ -133,19 +133,6 @@ def _append_to_aggregated_file(record: dict[str, Any]) -> None:
json.dump(records, f, indent=2, ensure_ascii=False)
-# Register --config-file with pytest so it does not reject the argument.
-def pytest_addoption(parser: pytest.Parser) -> None:
- parser.addoption(
- "--config-file",
- action="store",
- default=None,
- help=(
- "Path to the benchmark config JSON file (required). "
- "Example: --config-file tests/dfx/perf/tests/test_qwen_image_vllm_omni.json"
- ),
- )
-
-
_server_lock = threading.Lock()
# ---------------------------------------------------------------------------
diff --git a/tests/dfx/perf/tests/test.json b/tests/dfx/perf/tests/test_qwen_omni.json
similarity index 92%
rename from tests/dfx/perf/tests/test.json
rename to tests/dfx/perf/tests/test_qwen_omni.json
index 159e27a064..4662f8c0c7 100644
--- a/tests/dfx/perf/tests/test.json
+++ b/tests/dfx/perf/tests/test_qwen_omni.json
@@ -329,37 +329,5 @@
}
}
]
- },
- {
- "test_name": "test_qwen3_tts",
- "server_params": {
- "model": "Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice"
- },
- "benchmark_params": [
- {
- "dataset_name": "random",
- "backend": "openai-audio-speech",
- "endpoint": "/v1/audio/speech",
- "num_prompts": [
- 10,
- 40
- ],
- "max_concurrency": [
- 1,
- 4
- ],
- "random_input_len": 100,
- "random_output_len": 100,
- "extra_body": {
- "voice": "Vivian",
- "language": "English"
- },
- "percentile-metrics": "ttft,e2el,audio_rtf,audio_ttfp,audio_duration",
- "baseline": {
- "mean_audio_ttfp_ms": [6000, 6000],
- "mean_audio_rtf": [0.3, 0.3]
- }
- }
- ]
}
]
diff --git a/tests/dfx/perf/tests/test_tts.json b/tests/dfx/perf/tests/test_tts.json
new file mode 100644
index 0000000000..3583b45b4f
--- /dev/null
+++ b/tests/dfx/perf/tests/test_tts.json
@@ -0,0 +1,34 @@
+[
+ {
+ "test_name": "test_qwen3_tts",
+ "server_params": {
+ "model": "Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice"
+ },
+ "benchmark_params": [
+ {
+ "dataset_name": "random",
+ "backend": "openai-audio-speech",
+ "endpoint": "/v1/audio/speech",
+ "num_prompts": [
+ 10,
+ 40
+ ],
+ "max_concurrency": [
+ 1,
+ 4
+ ],
+ "random_input_len": 100,
+ "random_output_len": 100,
+ "extra_body": {
+ "voice": "Vivian",
+ "language": "English"
+ },
+ "percentile-metrics": "ttft,e2el,audio_rtf,audio_ttfp,audio_duration",
+ "baseline": {
+ "mean_audio_ttfp_ms": [6000, 6000],
+ "mean_audio_rtf": [0.3, 0.3]
+ }
+ }
+ ]
+ }
+]