diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index d9a2315953a..00823951dcc 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -44,11 +44,19 @@ steps: agents: queue: "cpu_queue_premerge" - # L4 Test — main+NIGHTLY=1 (scheduled), or PR with label nightly-test (e.g. add label then Rebuild) + # L4 Test — main+NIGHTLY=1 (scheduled), or PR with specific label (e.g. add label then Rebuild) - label: "Upload Nightly Pipeline" depends_on: image-build key: upload-nightly-pipeline - if: '(build.branch == "main" && build.env("NIGHTLY") == "1") || (build.branch != "main" && build.pull_request.labels includes "nightly-test")' + if: >- + (build.branch == "main" && build.env("NIGHTLY") == "1") || + (build.branch != "main" && ( + build.pull_request.labels includes "nightly-test" || + build.pull_request.labels includes "omni-test" || + build.pull_request.labels includes "tts-test" || + build.pull_request.labels includes "diffusion-x2iat-test" || + build.pull_request.labels includes "diffusion-x2v-test" + )) commands: - buildkite-agent pipeline upload .buildkite/test-nightly.yml agents: diff --git a/.buildkite/test-nightly-diffusion.yml b/.buildkite/test-nightly-diffusion.yml deleted file mode 100644 index 04b99c0a837..00000000000 --- a/.buildkite/test-nightly-diffusion.yml +++ /dev/null @@ -1,364 +0,0 @@ -# Nightly diffusion GPU tests — appended to the main nightly build via -# buildkite-agent pipeline upload .buildkite/test-nightly-diffusion.yml -# from test-nightly.yml (step key: nightly-diffusion-model-test). Top-level groups are -# foldable in the Buildkite UI (Other / Wan / Qwen-Image). -env: - VLLM_WORKER_MULTIPROC_METHOD: spawn - HF_HUB_DOWNLOAD_TIMEOUT: 300 - HF_HUB_ETAG_TIMEOUT: 60 - -steps: - - group: ":card_index_dividers: Other Model Test" - key: nightly-other-model-test-group - steps: - - label: ":full_moon: Diffusion · Other · Function Test with H100" - timeout_in_minutes: 120 - # Shared nightly vs PR label conditional; referenced below as *nightly_or_pr_label - if: &nightly_or_pr_label 'build.env("NIGHTLY") == "1" || build.pull_request.labels includes "nightly-test"' - commands: - - pytest -s -v tests/e2e/online_serving/test_*_expansion.py -k "not test_wan22_expansion and not test_wan_2_1_vace_expansion and not test_qwen_image" -m "advanced_model and diffusion and H100" --run-level "advanced_model" - agents: - queue: "mithril-h100-pool" - plugins: - - kubernetes: - podSpec: - containers: - - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT - resources: - limits: - nvidia.com/gpu: 2 - volumeMounts: - - name: devshm - mountPath: /dev/shm - - name: hf-cache - mountPath: /root/.cache/huggingface - env: - - name: HF_HOME - value: /root/.cache/huggingface - - name: HF_TOKEN - valueFrom: - secretKeyRef: - name: hf-token-secret - key: token - nodeSelector: - node.kubernetes.io/instance-type: gpu-h100-sxm - volumes: - - name: devshm - emptyDir: - medium: Memory - - name: hf-cache - hostPath: - path: /mnt/hf-cache - type: DirectoryOrCreate - - - label: ":full_moon: Diffusion · Other · Function Test with L4" - timeout_in_minutes: 60 - if: *nightly_or_pr_label - commands: - - pytest -s -v tests/e2e/online_serving/test_*_expansion.py -m "advanced_model and diffusion and L4" --run-level "advanced_model" - agents: - queue: "gpu_4_queue" # g6.12xlarge instance on AWS, has 4 L4 GPU - plugins: - - docker#v5.2.0: - image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT - always-pull: true - propagate-environment: true - shm-size: "8gb" - environment: - - "HF_HOME=/fsx/hf_cache" - - "HF_TOKEN" - volumes: - - "/fsx/hf_cache:/fsx/hf_cache" - - - label: ":full_moon: Diffusion · Other · Doc Test" - timeout_in_minutes: 60 - if: *nightly_or_pr_label - commands: - - export VLLM_TEST_CLEAN_GPU_MEMORY="1" - - pytest -s -v tests/examples/online_serving/test_text_to_image.py tests/examples/offline_inference/test_text_to_image.py -m "advanced_model and example and H100" --run-level "advanced_model" - agents: - queue: "mithril-h100-pool" - plugins: - - kubernetes: - podSpec: - containers: - - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT - resources: - limits: - nvidia.com/gpu: 2 - volumeMounts: - - name: devshm - mountPath: /dev/shm - - name: hf-cache - mountPath: /root/.cache/huggingface - env: - - name: HF_HOME - value: /root/.cache/huggingface - - name: HF_TOKEN - valueFrom: - secretKeyRef: - name: hf-token-secret - key: token - nodeSelector: - node.kubernetes.io/instance-type: gpu-h100-sxm - volumes: - - name: devshm - emptyDir: - medium: Memory - - name: hf-cache - hostPath: - path: /mnt/hf-cache - type: DirectoryOrCreate - - - group: ":card_index_dividers: Wan Series Model Test" - key: nightly-wan-model-test-group - steps: - - label: ":full_moon: Diffusion · Wan · Function Test" - timeout_in_minutes: 90 - if: *nightly_or_pr_label - commands: - - pytest -s -v tests/e2e/online_serving/test_wan22_expansion.py tests/e2e/online_serving/test_wan_2_1_vace_expansion.py -m "advanced_model" --run-level "advanced_model" - agents: - queue: "mithril-h100-pool" - plugins: - - kubernetes: - podSpec: - containers: - - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT - resources: - limits: - nvidia.com/gpu: 2 - volumeMounts: - - name: devshm - mountPath: /dev/shm - - name: hf-cache - mountPath: /root/.cache/huggingface - env: - - name: HF_HOME - value: /root/.cache/huggingface - - name: HF_TOKEN - valueFrom: - secretKeyRef: - name: hf-token-secret - key: token - nodeSelector: - node.kubernetes.io/instance-type: gpu-h100-sxm - volumes: - - name: devshm - emptyDir: - medium: Memory - - name: hf-cache - hostPath: - path: /mnt/hf-cache - type: DirectoryOrCreate - - - label: ":full_moon: Diffusion · Wan · Accuracy Test" - key: nightly-wan22-i2v-accuracy - timeout_in_minutes: 180 - if: *nightly_or_pr_label - commands: - - pytest -s -v tests/e2e/accuracy/wan22_i2v/test_wan22_i2v_video_similarity.py --run-level advanced_model - agents: - queue: "mithril-h100-pool" - plugins: - - kubernetes: - podSpec: - containers: - - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT - resources: - limits: - nvidia.com/gpu: 2 - volumeMounts: - - name: devshm - mountPath: /dev/shm - - name: hf-cache - mountPath: /root/.cache/huggingface - env: - - name: HF_HOME - value: /root/.cache/huggingface - - name: HF_TOKEN - valueFrom: - secretKeyRef: - name: hf-token-secret - key: token - nodeSelector: - node.kubernetes.io/instance-type: gpu-h100-sxm - volumes: - - name: devshm - emptyDir: - medium: Memory - - name: hf-cache - hostPath: - path: /mnt/hf-cache - type: DirectoryOrCreate - - - group: ":card_index_dividers: Qwen-Image Series Model Test" - key: nightly-qwen-image-edit-group - steps: - - label: ":full_moon: Diffusion · Qwen-Image · Function Test with H100" - timeout_in_minutes: 120 - if: *nightly_or_pr_label - commands: - - pytest -s -v tests/e2e/online_serving/test_qwen_image*_expansion.py -m "advanced_model and diffusion and H100" --run-level "advanced_model" - agents: - queue: "mithril-h100-pool" - plugins: - - kubernetes: - podSpec: - containers: - - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT - resources: - limits: - nvidia.com/gpu: 2 - volumeMounts: - - name: devshm - mountPath: /dev/shm - - name: hf-cache - mountPath: /root/.cache/huggingface - env: - - name: HF_HOME - value: /root/.cache/huggingface - - name: HF_TOKEN - valueFrom: - secretKeyRef: - name: hf-token-secret - key: token - nodeSelector: - node.kubernetes.io/instance-type: gpu-h100-sxm - volumes: - - name: devshm - emptyDir: - medium: Memory - - name: hf-cache - hostPath: - path: /mnt/hf-cache - type: DirectoryOrCreate - - - label: ":full_moon: Diffusion · Qwen-Image · GEBench Accuracy Test" - key: nightly-gebench-accuracy - timeout_in_minutes: 60 - if: *nightly_or_pr_label - commands: - - pytest -s -v tests/e2e/accuracy/test_gebench_h100_smoke.py --run-level advanced_model --gebench-model Qwen/Qwen-Image-2512 --accuracy-judge-model QuantTrio/Qwen3-VL-30B-A3B-Instruct-AWQ --accuracy-gpu 0 --gebench-port 8093 --accuracy-workers 1 - - buildkite-agent artifact upload "tests/e2e/accuracy/artifacts/gebench_qwen-image-2512/summary*.json" - agents: - queue: "mithril-h100-pool" - plugins: - - kubernetes: - podSpec: - containers: - - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT - resources: - limits: - nvidia.com/gpu: 1 - volumeMounts: - - name: devshm - mountPath: /dev/shm - - name: hf-cache - mountPath: /root/.cache/huggingface - env: - - name: HF_HOME - value: /root/.cache/huggingface - - name: HF_TOKEN - valueFrom: - secretKeyRef: - name: hf-token-secret - key: token - nodeSelector: - node.kubernetes.io/instance-type: gpu-h100-sxm - volumes: - - name: devshm - emptyDir: - medium: Memory - - name: hf-cache - hostPath: - path: /mnt/hf-cache - type: DirectoryOrCreate - - - label: ":full_moon: Diffusion · Qwen-Image · GEdit-Bench Accuracy Test" - key: nightly-gedit-bench-accuracy - timeout_in_minutes: 60 - if: *nightly_or_pr_label - commands: - - pytest -s -v tests/e2e/accuracy/test_gedit_bench_h100_smoke.py --run-level advanced_model --gedit-model Qwen/Qwen-Image-Edit --accuracy-judge-model QuantTrio/Qwen3-VL-30B-A3B-Instruct-AWQ --accuracy-gpu 0 --gedit-port 8093 --gedit-samples-per-group 20 --accuracy-workers 1 - - buildkite-agent artifact upload "tests/e2e/accuracy/artifacts/gedit_scores_qwen-image-edit/qwen-image-edit_all_all_vie_score_*.csv" - - buildkite-agent artifact upload "tests/e2e/accuracy/artifacts/gedit_scores_qwen-image-edit/qwen-image-edit_all_all_summary_*.json" - agents: - queue: "mithril-h100-pool" - plugins: - - kubernetes: - podSpec: - containers: - - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT - resources: - limits: - nvidia.com/gpu: 1 - volumeMounts: - - name: devshm - mountPath: /dev/shm - - name: hf-cache - mountPath: /root/.cache/huggingface - env: - - name: HF_HOME - value: /root/.cache/huggingface - - name: VLLM_HTTP_TIMEOUT_KEEP_ALIVE - value: "120" - - name: HF_TOKEN - valueFrom: - secretKeyRef: - name: hf-token-secret - key: token - nodeSelector: - node.kubernetes.io/instance-type: gpu-h100-sxm - volumes: - - name: devshm - emptyDir: - medium: Memory - - name: hf-cache - hostPath: - path: /mnt/hf-cache - type: DirectoryOrCreate - - - label: ":full_moon: Diffusion · Qwen-Image · Perf Test" - key: nightly-qwen-image-performance - timeout_in_minutes: 180 - if: *nightly_or_pr_label - commands: - - export DIFFUSION_BENCHMARK_DIR=tests/dfx/perf/results - - export CACHE_DIT_VERSION=1.3.0 - - pytest -s -v tests/dfx/perf/scripts/run_diffusion_benchmark.py --config-file tests/dfx/perf/tests/test_qwen_image_vllm_omni.json - - buildkite-agent artifact upload "tests/dfx/perf/results/benchmark_results_*.json" - - buildkite-agent artifact upload "tests/dfx/perf/results/logs/*.log" - agents: - queue: "mithril-h100-pool" - plugins: - - kubernetes: - podSpec: - containers: - - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT - resources: - limits: - nvidia.com/gpu: 4 - volumeMounts: - - name: devshm - mountPath: /dev/shm - - name: hf-cache - mountPath: /root/.cache/huggingface - env: - - name: HF_HOME - value: /root/.cache/huggingface - - name: HF_TOKEN - valueFrom: - secretKeyRef: - name: hf-token-secret - key: token - nodeSelector: - node.kubernetes.io/instance-type: gpu-h100-sxm - volumes: - - name: devshm - emptyDir: - medium: Memory - - name: hf-cache - hostPath: - path: /mnt/hf-cache - type: DirectoryOrCreate diff --git a/.buildkite/test-nightly.yml b/.buildkite/test-nightly.yml index 06b7c14ae1d..a36757f3916 100644 --- a/.buildkite/test-nightly.yml +++ b/.buildkite/test-nightly.yml @@ -7,12 +7,11 @@ steps: # Group: collapses under one heading in the Buildkite UI; child steps still run in parallel. - group: ":card_index_dividers: Omni Model Test" key: nightly-omni-test-group + depends_on: upload-nightly-pipeline + if: build.env("NIGHTLY") == "1" || build.pull_request.labels includes "nightly-test" || build.pull_request.labels includes "omni-test" steps: - - label: ":full_moon: Omni · Function Test with H100" + - label: ":full_moon: Omni · Function Test" timeout_in_minutes: 90 - depends_on: upload-nightly-pipeline - # Shared nightly vs PR label conditional; referenced below as *nightly_or_pr_label - if: &nightly_or_pr_label 'build.env("NIGHTLY") == "1" || build.pull_request.labels includes "nightly-test"' commands: - pytest -s -v tests/e2e/online_serving/test_*_expansion.py -m "advanced_model and H100 and omni" --run-level "advanced_model" agents: @@ -49,13 +48,11 @@ steps: path: /mnt/hf-cache type: DirectoryOrCreate - - label: ":full_moon: Omni · Function Test with L4" + - label: ":full_moon: Omni · Doc Test with L4" timeout_in_minutes: 90 - depends_on: upload-nightly-pipeline - if: *nightly_or_pr_label commands: - export VLLM_ALLOW_LONG_MAX_MODEL_LEN="1" - - pytest -s -v tests/e2e/online_serving/test_*_expansion.py -m "advanced_model and L4 and omni" --run-level "advanced_model" + - pytest -s -v tests/examples/ -m "advanced_model and omni and L4" --run-level "advanced_model" agents: queue: "gpu_4_queue" # g6.12xlarge instance on AWS, has 4 L4 GPU plugins: @@ -70,13 +67,96 @@ steps: volumes: - "/fsx/hf_cache:/fsx/hf_cache" - - label: ":full_moon: Omni · Doc Test with L4" + - label: ":full_moon: Omni · Doc Test with H100" + timeout_in_minutes: 90 + commands: + - pytest -s -v tests/examples/ -m "advanced_model and omni and H100" --run-level "advanced_model" + agents: + queue: "mithril-h100-pool" + plugins: + - kubernetes: + podSpec: + containers: + - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT + resources: + limits: + nvidia.com/gpu: 2 + volumeMounts: + - name: devshm + mountPath: /dev/shm + - name: hf-cache + mountPath: /root/.cache/huggingface + env: + - name: HF_HOME + value: /root/.cache/huggingface + - name: HF_TOKEN + valueFrom: + secretKeyRef: + name: hf-token-secret + key: token + nodeSelector: + node.kubernetes.io/instance-type: gpu-h100-sxm + volumes: + - name: devshm + emptyDir: + medium: Memory + - name: hf-cache + hostPath: + path: /mnt/hf-cache + type: DirectoryOrCreate + + - label: ":full_moon: Omni · Perf Test" + key: nightly-omni-performance + timeout_in_minutes: 180 + commands: + - export BENCHMARK_DIR=tests/dfx/perf/results + - pytest -s -v tests/dfx/perf/scripts/run_benchmark.py --config-file tests/dfx/perf/tests/test_omni.json + - buildkite-agent artifact upload "tests/dfx/perf/results/*.json" + agents: + queue: "mithril-h100-pool" + plugins: + - kubernetes: + podSpec: + containers: + - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT + resources: + limits: + nvidia.com/gpu: 2 + volumeMounts: + - name: devshm + mountPath: /dev/shm + - name: hf-cache + mountPath: /root/.cache/huggingface + env: + - name: HF_HOME + value: /root/.cache/huggingface + - name: HF_TOKEN + valueFrom: + secretKeyRef: + name: hf-token-secret + key: token + nodeSelector: + node.kubernetes.io/instance-type: gpu-h100-sxm + volumes: + - name: devshm + emptyDir: + medium: Memory + - name: hf-cache + hostPath: + path: /mnt/hf-cache + type: DirectoryOrCreate + + + - group: ":card_index_dividers: TTS Model Test" + key: nightly-tts-test-group + depends_on: upload-nightly-pipeline + if: build.env("NIGHTLY") == "1" || build.pull_request.labels includes "nightly-test" || build.pull_request.labels includes "tts-test" + steps: + - label: ":full_moon: TTS · Function Test" timeout_in_minutes: 90 - depends_on: upload-nightly-pipeline - if: *nightly_or_pr_label commands: - export VLLM_ALLOW_LONG_MAX_MODEL_LEN="1" - - pytest -s -v tests/examples/ -m "advanced_model and omni and L4" --run-level "advanced_model" + - pytest -s -v tests/e2e/online_serving/test_*_expansion.py -m "advanced_model and L4 and omni" --run-level "advanced_model" agents: queue: "gpu_4_queue" # g6.12xlarge instance on AWS, has 4 L4 GPU plugins: @@ -91,12 +171,14 @@ steps: volumes: - "/fsx/hf_cache:/fsx/hf_cache" - - label: ":full_moon: Omni · Doc Test with H100" - timeout_in_minutes: 90 - depends_on: upload-nightly-pipeline - if: *nightly_or_pr_label + - label: ":full_moon: TTS · Perf Test" + key: nightly-tts-performance + timeout_in_minutes: 180 commands: - - pytest -s -v tests/examples/ -m "advanced_model and omni and H100" --run-level "advanced_model" + - export BENCHMARK_DIR=tests/dfx/perf/results + - export VLLM_ALLOW_LONG_MAX_MODEL_LEN="1" + - pytest -s -v tests/dfx/perf/scripts/run_benchmark.py --config-file tests/dfx/perf/tests/test_tts.json + - buildkite-agent artifact upload "tests/dfx/perf/results/*.json" agents: queue: "mithril-h100-pool" plugins: @@ -131,17 +213,19 @@ steps: path: /mnt/hf-cache type: DirectoryOrCreate - - label: ":full_moon: Omni · Perf Test" - key: nightly-omni-performance - timeout_in_minutes: 180 - depends_on: upload-nightly-pipeline - if: *nightly_or_pr_label + # Diffusion X2I suite: x2i / x2a / x2t and related non-video paths; x2v is only in "Diffusion X2V Model Test" below. + - group: ":card_index_dividers: Diffusion X2I(&A&T) Model Test" + key: nightly-diffusion-x2iat-group + depends_on: upload-nightly-pipeline + if: >- + build.env("NIGHTLY") == "1" || + build.pull_request.labels includes "nightly-test" || + build.pull_request.labels includes "diffusion-x2iat-test" + steps: + - label: ":full_moon: Diffusion X2I(&A&T) · Function Test with H100" + timeout_in_minutes: 120 commands: - - export BENCHMARK_DIR=tests/dfx/perf/results - - export VLLM_ALLOW_LONG_MAX_MODEL_LEN="1" - - pytest -s -v tests/dfx/perf/scripts/run_benchmark.py - - buildkite-agent artifact upload "tests/dfx/perf/results/*.json" - - buildkite-agent artifact upload "tests/dfx/perf/results/*.html" + - pytest -s -v tests/e2e/online_serving/test_*_expansion.py -k "not test_wan22_expansion and not test_wan_2_1_vace_expansion and not hunyuan" -m "advanced_model and diffusion and H100" --run-level "advanced_model" agents: queue: "mithril-h100-pool" plugins: @@ -176,23 +260,277 @@ steps: path: /mnt/hf-cache type: DirectoryOrCreate - # Dynamically appends steps from test-nightly-diffusion.yml into this build (same mechanism as - # pipeline.yml → test-ready.yml / test-merge.yml / test-nightly.yml). Foldable groups stay in the - # uploaded YAML (Other / Wan / Qwen-Image). - - label: ":card_index_dividers: Diffusion Model Test" - key: nightly-diffusion-model-test + - label: ":full_moon: Diffusion X2I(&A&T) · Function Test with L4" + timeout_in_minutes: 60 + commands: + - pytest -s -v tests/e2e/online_serving/test_*_expansion.py -k "not test_wan22_expansion and not test_wan_2_1_vace_expansion and not hunyuan" -m "advanced_model and diffusion and L4" --run-level "advanced_model" + agents: + queue: "gpu_4_queue" # g6.12xlarge instance on AWS, has 4 L4 GPU + plugins: + - docker#v5.2.0: + image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT + always-pull: true + propagate-environment: true + shm-size: "8gb" + environment: + - "HF_HOME=/fsx/hf_cache" + - "HF_TOKEN" + volumes: + - "/fsx/hf_cache:/fsx/hf_cache" + + - label: ":full_moon: Diffusion X2I(&A&T) · Doc Test" + timeout_in_minutes: 60 + commands: + - export VLLM_TEST_CLEAN_GPU_MEMORY="1" + - pytest -s -v tests/examples/*/test_text_to_image.py -m "advanced_model and example and H100" --run-level "advanced_model" + agents: + queue: "mithril-h100-pool" + plugins: + - kubernetes: + podSpec: + containers: + - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT + resources: + limits: + nvidia.com/gpu: 2 + volumeMounts: + - name: devshm + mountPath: /dev/shm + - name: hf-cache + mountPath: /root/.cache/huggingface + env: + - name: HF_HOME + value: /root/.cache/huggingface + - name: HF_TOKEN + valueFrom: + secretKeyRef: + name: hf-token-secret + key: token + nodeSelector: + node.kubernetes.io/instance-type: gpu-h100-sxm + volumes: + - name: devshm + emptyDir: + medium: Memory + - name: hf-cache + hostPath: + path: /mnt/hf-cache + type: DirectoryOrCreate + + - label: ":full_moon: Diffusion X2I(&A&T) · GEBench Accuracy Test" + timeout_in_minutes: 60 + commands: + - pytest -s -v tests/e2e/accuracy/test_gebench_h100_smoke.py --run-level advanced_model --gebench-model Qwen/Qwen-Image-2512 --accuracy-judge-model QuantTrio/Qwen3-VL-30B-A3B-Instruct-AWQ --accuracy-gpu 0 --gebench-port 8093 --accuracy-workers 1 + - buildkite-agent artifact upload "tests/e2e/accuracy/artifacts/gebench_qwen-image-2512/summary*.json" + agents: + queue: "mithril-h100-pool" + plugins: + - kubernetes: + podSpec: + containers: + - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT + resources: + limits: + nvidia.com/gpu: 1 + volumeMounts: + - name: devshm + mountPath: /dev/shm + - name: hf-cache + mountPath: /root/.cache/huggingface + env: + - name: HF_HOME + value: /root/.cache/huggingface + - name: HF_TOKEN + valueFrom: + secretKeyRef: + name: hf-token-secret + key: token + nodeSelector: + node.kubernetes.io/instance-type: gpu-h100-sxm + volumes: + - name: devshm + emptyDir: + medium: Memory + - name: hf-cache + hostPath: + path: /mnt/hf-cache + type: DirectoryOrCreate + + - label: ":full_moon: Diffusion X2I(&A&T) · GEdit-Bench Accuracy Test" + timeout_in_minutes: 60 + commands: + - pytest -s -v tests/e2e/accuracy/test_gedit_bench_h100_smoke.py --run-level advanced_model --gedit-model Qwen/Qwen-Image-Edit --accuracy-judge-model QuantTrio/Qwen3-VL-30B-A3B-Instruct-AWQ --accuracy-gpu 0 --gedit-port 8093 --gedit-samples-per-group 20 --accuracy-workers 1 + - buildkite-agent artifact upload "tests/e2e/accuracy/artifacts/gedit_scores_qwen-image-edit/qwen-image-edit_all_all_vie_score_*.csv" + - buildkite-agent artifact upload "tests/e2e/accuracy/artifacts/gedit_scores_qwen-image-edit/qwen-image-edit_all_all_summary_*.json" + agents: + queue: "mithril-h100-pool" + plugins: + - kubernetes: + podSpec: + containers: + - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT + resources: + limits: + nvidia.com/gpu: 1 + volumeMounts: + - name: devshm + mountPath: /dev/shm + - name: hf-cache + mountPath: /root/.cache/huggingface + env: + - name: HF_HOME + value: /root/.cache/huggingface + - name: VLLM_HTTP_TIMEOUT_KEEP_ALIVE + value: "120" + - name: HF_TOKEN + valueFrom: + secretKeyRef: + name: hf-token-secret + key: token + nodeSelector: + node.kubernetes.io/instance-type: gpu-h100-sxm + volumes: + - name: devshm + emptyDir: + medium: Memory + - name: hf-cache + hostPath: + path: /mnt/hf-cache + type: DirectoryOrCreate + + - label: ":full_moon: Diffusion X2I(&A&T) · Perf Test" + key: nightly-diffusion-x2iat-performance + timeout_in_minutes: 180 + commands: + - export DIFFUSION_BENCHMARK_DIR=tests/dfx/perf/results + - export CACHE_DIT_VERSION=1.3.0 + - pytest -s -v tests/dfx/perf/scripts/run_diffusion_benchmark.py --config-file tests/dfx/perf/tests/test_qwen_image_vllm_omni.json + - buildkite-agent artifact upload "tests/dfx/perf/results/benchmark_results_*.json" + - buildkite-agent artifact upload "tests/dfx/perf/results/logs/*.log" + agents: + queue: "mithril-h100-pool" + plugins: + - kubernetes: + podSpec: + containers: + - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT + resources: + limits: + nvidia.com/gpu: 4 + volumeMounts: + - name: devshm + mountPath: /dev/shm + - name: hf-cache + mountPath: /root/.cache/huggingface + env: + - name: HF_HOME + value: /root/.cache/huggingface + - name: HF_TOKEN + valueFrom: + secretKeyRef: + name: hf-token-secret + key: token + nodeSelector: + node.kubernetes.io/instance-type: gpu-h100-sxm + volumes: + - name: devshm + emptyDir: + medium: Memory + - name: hf-cache + hostPath: + path: /mnt/hf-cache + type: DirectoryOrCreate + + # Diffusion x2v only (Wan, HunyuanVideo, …). x2i/x2a/x2t live in the X2I group above, not here. + - group: ":card_index_dividers: Diffusion X2V Model Test" + key: nightly-diffusion-x2v-group depends_on: upload-nightly-pipeline - if: *nightly_or_pr_label - commands: - - buildkite-agent pipeline upload .buildkite/test-nightly-diffusion.yml - agents: - queue: "cpu_queue_premerge" + if: >- + build.env("NIGHTLY") == "1" || + build.pull_request.labels includes "nightly-test" || + build.pull_request.labels includes "diffusion-x2v-test" + steps: + - label: ":full_moon: Diffusion X2V · Function Test" + timeout_in_minutes: 90 + commands: + - pytest -s -v tests/e2e/online_serving/test_wan22_expansion.py tests/e2e/online_serving/test_wan_2_1_vace_expansion.py tests/e2e/online_serving/test_hunyuan_video_15_expansion.py -m "advanced_model" --run-level "advanced_model" + agents: + queue: "mithril-h100-pool" + plugins: + - kubernetes: + podSpec: + containers: + - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT + resources: + limits: + nvidia.com/gpu: 2 + volumeMounts: + - name: devshm + mountPath: /dev/shm + - name: hf-cache + mountPath: /root/.cache/huggingface + env: + - name: HF_HOME + value: /root/.cache/huggingface + - name: HF_TOKEN + valueFrom: + secretKeyRef: + name: hf-token-secret + key: token + nodeSelector: + node.kubernetes.io/instance-type: gpu-h100-sxm + volumes: + - name: devshm + emptyDir: + medium: Memory + - name: hf-cache + hostPath: + path: /mnt/hf-cache + type: DirectoryOrCreate + + - label: ":full_moon: Diffusion X2V · Accuracy Test" + timeout_in_minutes: 180 + commands: + - pytest -s -v tests/e2e/accuracy/wan22_i2v/test_wan22_i2v_video_similarity.py --run-level advanced_model + agents: + queue: "mithril-h100-pool" + plugins: + - kubernetes: + podSpec: + containers: + - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT + resources: + limits: + nvidia.com/gpu: 2 + volumeMounts: + - name: devshm + mountPath: /dev/shm + - name: hf-cache + mountPath: /root/.cache/huggingface + env: + - name: HF_HOME + value: /root/.cache/huggingface + - name: HF_TOKEN + valueFrom: + secretKeyRef: + name: hf-token-secret + key: token + nodeSelector: + node.kubernetes.io/instance-type: gpu-h100-sxm + volumes: + - name: devshm + emptyDir: + medium: Memory + - name: hf-cache + hostPath: + path: /mnt/hf-cache + type: DirectoryOrCreate - label: ":bar_chart: Testcase Statistics" key: nightly-testcase-statistics timeout_in_minutes: 120 depends_on: upload-nightly-pipeline - if: *nightly_or_pr_label + if: build.env("NIGHTLY") == "1" || build.pull_request.labels includes "nightly-test" commands: - python tools/nightly/buildkite_testcase_statistics.py -o tests/dfx/perf/results/buildkite_testcase_statistics.html - buildkite-agent artifact upload "tests/dfx/perf/results/*.html" @@ -235,16 +573,18 @@ steps: key: nightly-perf-distribution depends_on: - nightly-omni-performance - - nightly-qwen-image-performance + - nightly-tts-performance + - nightly-diffusion-x2iat-performance - nightly-testcase-statistics if: build.env("NIGHTLY") == "1" commands: - pip install openpyxl - export DEFAULT_INPUT_DIR=tests/dfx/perf/results - export DEFAULT_OUTPUT_DIR=tests/dfx/perf/results - - buildkite-agent artifact download "tests/dfx/perf/results/*.json" . --step nightly-omni-performance - - buildkite-agent artifact download "tests/dfx/perf/results/*.json" . --step nightly-qwen-image-performance - - buildkite-agent artifact download "tests/dfx/perf/results/*.html" . --step nightly-omni-performance + - buildkite-agent artifact download "tests/dfx/perf/results/*.json" . --step nightly-tts-performance + - buildkite-agent artifact download "tests/dfx/perf/results/*.json" . --step nightly-omni-performance + - buildkite-agent artifact download "tests/dfx/perf/results/*.json" . --step nightly-diffusion-x2iat-performance + - buildkite-agent artifact download "tests/dfx/perf/results/*.html" . --step nightly-testcase-statistics - python tools/nightly/generate_nightly_perf_excel.py - python tools/nightly/generate_nightly_perf_html.py - python tools/nightly/send_nightly_email.py --report-file "tests/dfx/perf/results/*.xlsx, tests/dfx/perf/results/*.html" diff --git a/docs/contributing/ci/CI_5levels.md b/docs/contributing/ci/CI_5levels.md index 74ae1a38eb8..44802fba124 100644 --- a/docs/contributing/ci/CI_5levels.md +++ b/docs/contributing/ci/CI_5levels.md @@ -86,7 +86,7 @@ Through five levels (L1-L5) and common (Common) specifications, the system clari /tests/e2e/online_serving/test_{model_name}_expansion.py
/tests/e2e/offline_inference/test_{model_name}_expansion.py
Performance:
- /tests/dfx/perf/tests/test.json
+ /tests/dfx/perf/tests/test_omni.json (Omni) and test_tts.json (TTS)
Doc Test:
tests/example/online_serving/test_{model_name}.py
tests/example/offline_inference/test_{model_name}.py @@ -530,13 +530,13 @@ L4 level testing is a comprehensive quality audit before a version release. It e ### 3.2 Testing Content and Scope - ***Full Functionality Testing***: Executes all test cases defined in `test_{model_name}_expansion.py`, covering all implemented features, positive flows, boundary conditions, and exception handling. -- ***Performance Testing***: Uses the `tests/dfx/perf/tests/test.json` configuration file to drive performance testing tools for stress, load, and endurance tests, collecting metrics like throughput, response time, and resource utilization. +- ***Performance Testing***: Uses `tests/dfx/perf/tests/test_omni.json` and `test_tts.json` (passed to `run_benchmark.py` via `--config-file`) to drive performance testing tools for stress, load, and endurance tests, collecting metrics like throughput, response time, and resource utilization. - ***Documentation Testing***: Verifies whether the example code provided to users is runnable and its results match the description. ### 3.3 Test Directory and Execution Files - ***Functional Testing***: Same directories as L3. -- ***Performance Test Configuration***: `tests/dfx/perf/tests/test.json` +- ***Performance Test Configuration***: `tests/dfx/perf/tests/test_omni.json`, `tests/dfx/perf/tests/test_tts.json` - ***Documentation Example Tests***: - - `tests/example/online_serving/test_{model_name}.py` - `tests/example/offline_inference/test_{model_name}.py` diff --git a/docs/contributing/ci/test_examples/l4_performance_tests.inc.md b/docs/contributing/ci/test_examples/l4_performance_tests.inc.md index 8093e1459f5..d1ff8d5d5bb 100644 --- a/docs/contributing/ci/test_examples/l4_performance_tests.inc.md +++ b/docs/contributing/ci/test_examples/l4_performance_tests.inc.md @@ -1,4 +1,4 @@ -When you want to add L4-level ***performance test*** cases, you can refer to the following format for case addition in tests/dfx/perf/tests/test.json: +When you want to add L4-level ***performance test*** cases, you can refer to the following format for case addition in `tests/dfx/perf/tests/test_omni.json` or `tests/dfx/perf/tests/test_tts.json` (selected via `pytest ... run_benchmark.py --config-file `): ```JSON { diff --git a/docs/contributing/ci/test_guide.md b/docs/contributing/ci/test_guide.md index 425f24332c2..a3e95366a36 100644 --- a/docs/contributing/ci/test_guide.md +++ b/docs/contributing/ci/test_guide.md @@ -45,7 +45,6 @@ Our test scripts use the pytest framework. First, please use `git clone https:// === "L3 level & L4 level" ```bash - cd tests pytest -s -v -m "advanced_model" --run-level=advanced_model ``` If you only want to run L3 test case, you can use: @@ -60,9 +59,9 @@ Our test scripts use the pytest framework. First, please use `git clone https:// ```bash pytest -s -v -m "core_model and distributed_cuda and L4" --run-level=core_model ``` - Note: To run performance tests, use: + Note: To run performance tests (defaults to ``test_omni.json``; use ``--config-file tests/dfx/perf/tests/test_tts.json`` for TTS): ```bash - pytest -s -v perf/scripts/run_benchmark.py + pytest -s -v tests/dfx/perf/scripts/run_benchmark.py ``` The latest L3 test commands for various test suites can be found in the [pipeline](https://github.com/vllm-project/vllm-omni/blob/main/.buildkite/test-merge.yml). diff --git a/tests/dfx/perf/scripts/run_benchmark.py b/tests/dfx/perf/scripts/run_benchmark.py index c625239e5c2..e07cc67c09f 100644 --- a/tests/dfx/perf/scripts/run_benchmark.py +++ b/tests/dfx/perf/scripts/run_benchmark.py @@ -1,6 +1,7 @@ import json import os import subprocess +import sys import threading from datetime import datetime from pathlib import Path @@ -21,9 +22,32 @@ os.environ["VLLM_TEST_CLEAN_GPU_MEMORY"] = "0" -CONFIG_FILE_PATH = str(Path(__file__).parent.parent / "tests" / "test.json") -BENCHMARK_CONFIGS = load_configs(CONFIG_FILE_PATH) +def _get_config_file_from_argv() -> str | None: + """Read ``--config-file`` from ``sys.argv`` at import time so parametrization can use it. + + Supports ``--config-file path`` and ``--config-file=path`` (same pattern as + ``run_diffusion_benchmark.py``). + """ + for i, arg in enumerate(sys.argv): + if arg == "--config-file" and i + 1 < len(sys.argv): + return sys.argv[i + 1] + if arg.startswith("--config-file="): + return arg.split("=", 1)[1] + return None + + +_PERF_TESTS_DIR = Path(__file__).resolve().parent.parent / "tests" +_DEFAULT_CONFIG_FILE = str(_PERF_TESTS_DIR / "test_omni.json") + +CONFIG_FILE_PATH = _get_config_file_from_argv() +if CONFIG_FILE_PATH is None: + print( + "No --config-file in argv, using default: tests/dfx/perf/tests/test_omni.json " + "(override with e.g. --config-file tests/dfx/perf/tests/test_tts.json)" + ) + CONFIG_FILE_PATH = _DEFAULT_CONFIG_FILE +BENCHMARK_CONFIGS = load_configs(CONFIG_FILE_PATH) STAGE_CONFIGS_DIR = Path(__file__).parent.parent / "stage_configs" test_params = create_unique_server_params(BENCHMARK_CONFIGS, STAGE_CONFIGS_DIR) @@ -32,6 +56,18 @@ _omni_server_lock = threading.Lock() +def pytest_addoption(parser: pytest.Parser) -> None: + parser.addoption( + "--config-file", + action="store", + default=None, + help=( + "Path to Omni/TTS serve benchmark JSON (default: tests/dfx/perf/tests/test_omni.json). " + "Example: --config-file tests/dfx/perf/tests/test_tts.json" + ), + ) + + @pytest.fixture(scope="module") def omni_server(request): """Start vLLM-Omni server as a subprocess with actual model weights. diff --git a/tests/dfx/perf/tests/test.json b/tests/dfx/perf/tests/test_omni.json similarity index 87% rename from tests/dfx/perf/tests/test.json rename to tests/dfx/perf/tests/test_omni.json index fe7e3804698..0bb08a839cb 100644 --- a/tests/dfx/perf/tests/test.json +++ b/tests/dfx/perf/tests/test_omni.json @@ -200,37 +200,5 @@ } } ] - }, - { - "test_name": "test_qwen3_tts", - "server_params": { - "model": "Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice" - }, - "benchmark_params": [ - { - "dataset_name": "random", - "backend": "openai-audio-speech", - "endpoint": "/v1/audio/speech", - "num_prompts": [ - 10, - 40 - ], - "max_concurrency": [ - 1, - 4 - ], - "random_input_len": 100, - "random_output_len": 100, - "extra_body": { - "voice": "Vivian", - "language": "English" - }, - "percentile-metrics": "ttft,e2el,audio_rtf,audio_ttfp,audio_duration", - "baseline": { - "mean_audio_ttfp_ms": [6000, 6000], - "mean_audio_rtf": [0.3, 0.3] - } - } - ] } ] diff --git a/tests/dfx/perf/tests/test_tts.json b/tests/dfx/perf/tests/test_tts.json new file mode 100644 index 00000000000..3583b45b4f2 --- /dev/null +++ b/tests/dfx/perf/tests/test_tts.json @@ -0,0 +1,34 @@ +[ + { + "test_name": "test_qwen3_tts", + "server_params": { + "model": "Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice" + }, + "benchmark_params": [ + { + "dataset_name": "random", + "backend": "openai-audio-speech", + "endpoint": "/v1/audio/speech", + "num_prompts": [ + 10, + 40 + ], + "max_concurrency": [ + 1, + 4 + ], + "random_input_len": 100, + "random_output_len": 100, + "extra_body": { + "voice": "Vivian", + "language": "English" + }, + "percentile-metrics": "ttft,e2el,audio_rtf,audio_ttfp,audio_duration", + "baseline": { + "mean_audio_ttfp_ms": [6000, 6000], + "mean_audio_rtf": [0.3, 0.3] + } + } + ] + } +]