diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index d9a2315953..00823951dc 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -44,11 +44,19 @@ steps: agents: queue: "cpu_queue_premerge" - # L4 Test — main+NIGHTLY=1 (scheduled), or PR with label nightly-test (e.g. add label then Rebuild) + # L4 Test — main+NIGHTLY=1 (scheduled), or PR with specific label (e.g. add label then Rebuild) - label: "Upload Nightly Pipeline" depends_on: image-build key: upload-nightly-pipeline - if: '(build.branch == "main" && build.env("NIGHTLY") == "1") || (build.branch != "main" && build.pull_request.labels includes "nightly-test")' + if: >- + (build.branch == "main" && build.env("NIGHTLY") == "1") || + (build.branch != "main" && ( + build.pull_request.labels includes "nightly-test" || + build.pull_request.labels includes "omni-test" || + build.pull_request.labels includes "tts-test" || + build.pull_request.labels includes "diffusion-x2iat-test" || + build.pull_request.labels includes "diffusion-x2v-test" + )) commands: - buildkite-agent pipeline upload .buildkite/test-nightly.yml agents: diff --git a/.buildkite/test-nightly-diffusion.yml b/.buildkite/test-nightly-diffusion.yml deleted file mode 100644 index b5ba8a117c..0000000000 --- a/.buildkite/test-nightly-diffusion.yml +++ /dev/null @@ -1,417 +0,0 @@ -# Nightly diffusion GPU tests — appended to the main nightly build via -# buildkite-agent pipeline upload .buildkite/test-nightly-diffusion.yml -# from test-nightly.yml (step key: nightly-diffusion-model-test). Top-level groups are -# foldable in the Buildkite UI (Other / Wan / Qwen-Image). -env: - VLLM_WORKER_MULTIPROC_METHOD: spawn - HF_HUB_DOWNLOAD_TIMEOUT: 300 - HF_HUB_ETAG_TIMEOUT: 60 - -steps: - - group: ":card_index_dividers: Other Model Test" - key: nightly-other-model-test-group - steps: - - label: ":full_moon: Diffusion · Other · Function Test with H100" - timeout_in_minutes: 120 - # Shared nightly vs PR label conditional; referenced below as *nightly_or_pr_label - if: &nightly_or_pr_label 'build.env("NIGHTLY") == "1" || build.pull_request.labels includes "nightly-test"' - commands: - - pytest -s -v tests/e2e/online_serving/test_*_expansion.py -k "not test_wan22_expansion and not test_wan_2_1_vace_expansion and not test_qwen_image" -m "advanced_model and diffusion and H100" --run-level "advanced_model" - agents: - queue: "mithril-h100-pool" - plugins: - - kubernetes: - podSpec: - containers: - - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT - resources: - limits: - nvidia.com/gpu: 2 - volumeMounts: - - name: devshm - mountPath: /dev/shm - - name: hf-cache - mountPath: /root/.cache/huggingface - env: - - name: HF_HOME - value: /root/.cache/huggingface - - name: HF_TOKEN - valueFrom: - secretKeyRef: - name: hf-token-secret - key: token - nodeSelector: - node.kubernetes.io/instance-type: gpu-h100-sxm - volumes: - - name: devshm - emptyDir: - medium: Memory - - name: hf-cache - hostPath: - path: /mnt/hf-cache - type: DirectoryOrCreate - - - label: ":full_moon: Diffusion · Other · Function Test with L4" - timeout_in_minutes: 60 - if: *nightly_or_pr_label - commands: - - pytest -s -v tests/e2e/online_serving/test_*_expansion.py -m "advanced_model and diffusion and L4" --run-level "advanced_model" - agents: - queue: "gpu_4_queue" # g6.12xlarge instance on AWS, has 4 L4 GPU - plugins: - - docker#v5.2.0: - image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT - always-pull: true - propagate-environment: true - shm-size: "8gb" - environment: - - "HF_HOME=/fsx/hf_cache" - - "HF_TOKEN" - volumes: - - "/fsx/hf_cache:/fsx/hf_cache" - - - label: ":full_moon: Diffusion · Other · Doc Test" - timeout_in_minutes: 60 - if: *nightly_or_pr_label - commands: - - export VLLM_TEST_CLEAN_GPU_MEMORY="1" - - pytest -s -v tests/examples/online_serving/test_text_to_image.py tests/examples/offline_inference/test_text_to_image.py -m "advanced_model and example and H100" --run-level "advanced_model" - agents: - queue: "mithril-h100-pool" - plugins: - - kubernetes: - podSpec: - containers: - - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT - resources: - limits: - nvidia.com/gpu: 2 - volumeMounts: - - name: devshm - mountPath: /dev/shm - - name: hf-cache - mountPath: /root/.cache/huggingface - env: - - name: HF_HOME - value: /root/.cache/huggingface - - name: HF_TOKEN - valueFrom: - secretKeyRef: - name: hf-token-secret - key: token - nodeSelector: - node.kubernetes.io/instance-type: gpu-h100-sxm - volumes: - - name: devshm - emptyDir: - medium: Memory - - name: hf-cache - hostPath: - path: /mnt/hf-cache - type: DirectoryOrCreate - - - group: ":card_index_dividers: Wan Series Model Test" - key: nightly-wan-model-test-group - steps: - - label: ":full_moon: Diffusion · Wan · Function Test" - timeout_in_minutes: 90 - if: *nightly_or_pr_label - commands: - - pytest -s -v tests/e2e/online_serving/test_wan22_expansion.py tests/e2e/online_serving/test_wan_2_1_vace_expansion.py -m "advanced_model" --run-level "advanced_model" - agents: - queue: "mithril-h100-pool" - plugins: - - kubernetes: - podSpec: - containers: - - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT - resources: - limits: - nvidia.com/gpu: 2 - volumeMounts: - - name: devshm - mountPath: /dev/shm - - name: hf-cache - mountPath: /root/.cache/huggingface - env: - - name: HF_HOME - value: /root/.cache/huggingface - - name: HF_TOKEN - valueFrom: - secretKeyRef: - name: hf-token-secret - key: token - nodeSelector: - node.kubernetes.io/instance-type: gpu-h100-sxm - volumes: - - name: devshm - emptyDir: - medium: Memory - - name: hf-cache - hostPath: - path: /mnt/hf-cache - type: DirectoryOrCreate - - - label: ":full_moon: Diffusion · Wan · Accuracy Test" - key: nightly-wan22-i2v-accuracy - timeout_in_minutes: 180 - if: *nightly_or_pr_label - commands: - - pytest -s -v tests/e2e/accuracy/wan22_i2v/test_wan22_i2v_video_similarity.py --run-level advanced_model - agents: - queue: "mithril-h100-pool" - plugins: - - kubernetes: - podSpec: - containers: - - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT - resources: - limits: - nvidia.com/gpu: 2 - volumeMounts: - - name: devshm - mountPath: /dev/shm - - name: hf-cache - mountPath: /root/.cache/huggingface - env: - - name: HF_HOME - value: /root/.cache/huggingface - - name: HF_TOKEN - valueFrom: - secretKeyRef: - name: hf-token-secret - key: token - nodeSelector: - node.kubernetes.io/instance-type: gpu-h100-sxm - volumes: - - name: devshm - emptyDir: - medium: Memory - - name: hf-cache - hostPath: - path: /mnt/hf-cache - type: DirectoryOrCreate - - - group: ":card_index_dividers: Qwen-Image Series Model Test" - key: nightly-qwen-image-edit-group - steps: - - label: ":full_moon: Diffusion · Qwen-Image · Function Test with H100" - timeout_in_minutes: 120 - if: *nightly_or_pr_label - commands: - - pytest -s -v tests/e2e/online_serving/test_qwen_image*_expansion.py -m "advanced_model and diffusion and H100" --run-level "advanced_model" - agents: - queue: "mithril-h100-pool" - plugins: - - kubernetes: - podSpec: - containers: - - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT - resources: - limits: - nvidia.com/gpu: 2 - volumeMounts: - - name: devshm - mountPath: /dev/shm - - name: hf-cache - mountPath: /root/.cache/huggingface - env: - - name: HF_HOME - value: /root/.cache/huggingface - - name: HF_TOKEN - valueFrom: - secretKeyRef: - name: hf-token-secret - key: token - nodeSelector: - node.kubernetes.io/instance-type: gpu-h100-sxm - volumes: - - name: devshm - emptyDir: - medium: Memory - - name: hf-cache - hostPath: - path: /mnt/hf-cache - type: DirectoryOrCreate - - - label: ":full_moon: Diffusion · Qwen-Image · GEBench Accuracy Test" - key: nightly-gebench-accuracy - timeout_in_minutes: 60 - if: *nightly_or_pr_label - commands: - - pytest -s -v tests/e2e/accuracy/test_gebench_h100_smoke.py --run-level advanced_model --gebench-model Qwen/Qwen-Image-2512 --accuracy-judge-model QuantTrio/Qwen3-VL-30B-A3B-Instruct-AWQ --accuracy-gpu 0 --gebench-port 8093 --accuracy-workers 1 - - buildkite-agent artifact upload "tests/e2e/accuracy/artifacts/gebench_qwen-image-2512/summary*.json" - agents: - queue: "mithril-h100-pool" - plugins: - - kubernetes: - podSpec: - containers: - - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT - resources: - limits: - nvidia.com/gpu: 1 - volumeMounts: - - name: devshm - mountPath: /dev/shm - - name: hf-cache - mountPath: /root/.cache/huggingface - env: - - name: HF_HOME - value: /root/.cache/huggingface - - name: HF_TOKEN - valueFrom: - secretKeyRef: - name: hf-token-secret - key: token - nodeSelector: - node.kubernetes.io/instance-type: gpu-h100-sxm - volumes: - - name: devshm - emptyDir: - medium: Memory - - name: hf-cache - hostPath: - path: /mnt/hf-cache - type: DirectoryOrCreate - - - label: ":full_moon: Diffusion · Qwen-Image · GEdit-Bench Accuracy Test" - key: nightly-gedit-bench-accuracy - timeout_in_minutes: 60 - if: *nightly_or_pr_label - commands: - - pytest -s -v tests/e2e/accuracy/test_gedit_bench_h100_smoke.py --run-level advanced_model --gedit-model Qwen/Qwen-Image-Edit --accuracy-judge-model QuantTrio/Qwen3-VL-30B-A3B-Instruct-AWQ --accuracy-gpu 0 --gedit-port 8093 --gedit-samples-per-group 20 --accuracy-workers 1 - - buildkite-agent artifact upload "tests/e2e/accuracy/artifacts/gedit_scores_qwen-image-edit/qwen-image-edit_all_all_vie_score_*.csv" - - buildkite-agent artifact upload "tests/e2e/accuracy/artifacts/gedit_scores_qwen-image-edit/qwen-image-edit_all_all_summary_*.json" - agents: - queue: "mithril-h100-pool" - plugins: - - kubernetes: - podSpec: - containers: - - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT - resources: - limits: - nvidia.com/gpu: 1 - volumeMounts: - - name: devshm - mountPath: /dev/shm - - name: hf-cache - mountPath: /root/.cache/huggingface - env: - - name: HF_HOME - value: /root/.cache/huggingface - - name: VLLM_HTTP_TIMEOUT_KEEP_ALIVE - value: "120" - - name: HF_TOKEN - valueFrom: - secretKeyRef: - name: hf-token-secret - key: token - nodeSelector: - node.kubernetes.io/instance-type: gpu-h100-sxm - volumes: - - name: devshm - emptyDir: - medium: Memory - - name: hf-cache - hostPath: - path: /mnt/hf-cache - type: DirectoryOrCreate - - - label: ":full_moon: Diffusion · Qwen-Image · Perf Test" - key: nightly-qwen-image-performance - timeout_in_minutes: 180 - if: *nightly_or_pr_label - commands: - - export DIFFUSION_BENCHMARK_DIR=tests/dfx/perf/results - - export DIFFUSION_ATTENTION_BACKEND=FLASH_ATTN - - export CACHE_DIT_VERSION=1.3.0 - # [HACK]: run upload in the same command block as pytest. - # Because `exit` aborts the entire commands list. - - | - set +e - pytest -s -v tests/dfx/perf/scripts/run_diffusion_benchmark.py --config-file tests/dfx/perf/tests/test_qwen_image_vllm_omni.json - EXIT1=$$? - pytest -s -v tests/dfx/perf/scripts/run_diffusion_benchmark.py --config-file tests/dfx/perf/tests/test_qwen_image_edit_vllm_omni.json - EXIT2=$$? - pytest -s -v tests/dfx/perf/scripts/run_diffusion_benchmark.py --config-file tests/dfx/perf/tests/test_qwen_image_edit_2509_vllm_omni.json - EXIT3=$$? - if [ $$EXIT1 -eq 0 ] || [ $$EXIT2 -eq 0 ] || [ $$EXIT3 -eq 0 ]; then - buildkite-agent artifact upload "tests/dfx/perf/results/diffusion_result_*.json" - buildkite-agent artifact upload "tests/dfx/perf/results/logs/*.log" - fi - exit $$((EXIT1 | EXIT2 | EXIT3)) - agents: - queue: "mithril-h100-pool" - plugins: - - kubernetes: - podSpec: - containers: - - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT - resources: - limits: - nvidia.com/gpu: 4 - volumeMounts: - - name: devshm - mountPath: /dev/shm - - name: hf-cache - mountPath: /root/.cache/huggingface - env: - - name: HF_HOME - value: /root/.cache/huggingface - - name: HF_TOKEN - valueFrom: - secretKeyRef: - name: hf-token-secret - key: token - nodeSelector: - node.kubernetes.io/instance-type: gpu-h100-sxm - volumes: - - name: devshm - emptyDir: - medium: Memory - - name: hf-cache - hostPath: - path: /mnt/hf-cache - type: DirectoryOrCreate - - - label: ":full_moon: Diffusion · Qwen-Image · Accuracy Test" - key: nightly-qwen-image-accuracy - timeout_in_minutes: 180 - if: *nightly_or_pr_label - commands: - - pytest -s -v tests/e2e/accuracy/test_qwen_image*.py --run-level advanced_model - agents: - queue: "mithril-h100-pool" - plugins: - - kubernetes: - podSpec: - containers: - - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT - resources: - limits: - nvidia.com/gpu: 1 - volumeMounts: - - name: devshm - mountPath: /dev/shm - - name: hf-cache - mountPath: /root/.cache/huggingface - env: - - name: HF_HOME - value: /root/.cache/huggingface - - name: HF_TOKEN - valueFrom: - secretKeyRef: - name: hf-token-secret - key: token - nodeSelector: - node.kubernetes.io/instance-type: gpu-h100-sxm - volumes: - - name: devshm - emptyDir: - medium: Memory - - name: hf-cache - hostPath: - path: /mnt/hf-cache - type: DirectoryOrCreate diff --git a/.buildkite/test-nightly.yml b/.buildkite/test-nightly.yml index 31b3e17976..58e1e55af7 100644 --- a/.buildkite/test-nightly.yml +++ b/.buildkite/test-nightly.yml @@ -7,12 +7,11 @@ steps: # Group: collapses under one heading in the Buildkite UI; child steps still run in parallel. - group: ":card_index_dividers: Omni Model Test" key: nightly-omni-test-group + depends_on: upload-nightly-pipeline + if: build.env("NIGHTLY") == "1" || build.pull_request.labels includes "nightly-test" || build.pull_request.labels includes "omni-test" steps: - - label: ":full_moon: Omni · Function Test with H100" + - label: ":full_moon: Omni · Function Test" timeout_in_minutes: 90 - depends_on: upload-nightly-pipeline - # Shared nightly vs PR label conditional; referenced below as *nightly_or_pr_label - if: &nightly_or_pr_label 'build.env("NIGHTLY") == "1" || build.pull_request.labels includes "nightly-test"' commands: - pytest -s -v tests/e2e/online_serving/test_*_expansion.py -m "advanced_model and H100 and omni" --run-level "advanced_model" agents: @@ -49,13 +48,11 @@ steps: path: /mnt/hf-cache type: DirectoryOrCreate - - label: ":full_moon: Omni · Function Test with L4" + - label: ":full_moon: Omni · Doc Test with L4" timeout_in_minutes: 90 - depends_on: upload-nightly-pipeline - if: *nightly_or_pr_label commands: - export VLLM_ALLOW_LONG_MAX_MODEL_LEN="1" - - pytest -s -v tests/e2e/online_serving/test_*_expansion.py -m "advanced_model and L4 and omni" --run-level "advanced_model" + - pytest -s -v tests/examples/ -m "advanced_model and omni and L4" --run-level "advanced_model" agents: queue: "gpu_4_queue" # g6.12xlarge instance on AWS, has 4 L4 GPU plugins: @@ -70,13 +67,203 @@ steps: volumes: - "/fsx/hf_cache:/fsx/hf_cache" - - label: ":full_moon: Omni · Doc Test with L4" + - label: ":full_moon: Omni · Doc Test with H100" + timeout_in_minutes: 90 + commands: + - pytest -s -v tests/examples/ -m "advanced_model and omni and H100" --run-level "advanced_model" + agents: + queue: "mithril-h100-pool" + plugins: + - kubernetes: + podSpec: + containers: + - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT + resources: + limits: + nvidia.com/gpu: 2 + volumeMounts: + - name: devshm + mountPath: /dev/shm + - name: hf-cache + mountPath: /root/.cache/huggingface + env: + - name: HF_HOME + value: /root/.cache/huggingface + - name: HF_TOKEN + valueFrom: + secretKeyRef: + name: hf-token-secret + key: token + nodeSelector: + node.kubernetes.io/instance-type: gpu-h100-sxm + volumes: + - name: devshm + emptyDir: + medium: Memory + - name: hf-cache + hostPath: + path: /mnt/hf-cache + type: DirectoryOrCreate + + - label: ":full_moon: Omni · Perf Test" + key: nightly-omni-performance + timeout_in_minutes: 180 + commands: + - export BENCHMARK_DIR=tests/dfx/perf/results + - pytest -s -v tests/dfx/perf/scripts/run_benchmark.py --test-config-file tests/dfx/perf/tests/test_qwen_omni.json + - buildkite-agent artifact upload "tests/dfx/perf/results/*.json" + agents: + queue: "mithril-h100-pool" + plugins: + - kubernetes: + podSpec: + containers: + - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT + resources: + limits: + nvidia.com/gpu: 2 + volumeMounts: + - name: devshm + mountPath: /dev/shm + - name: hf-cache + mountPath: /root/.cache/huggingface + env: + - name: HF_HOME + value: /root/.cache/huggingface + - name: HF_TOKEN + valueFrom: + secretKeyRef: + name: hf-token-secret + key: token + nodeSelector: + node.kubernetes.io/instance-type: gpu-h100-sxm + volumes: + - name: devshm + emptyDir: + medium: Memory + - name: hf-cache + hostPath: + path: /mnt/hf-cache + type: DirectoryOrCreate + + + - group: ":card_index_dividers: TTS Model Test" + key: nightly-tts-test-group + depends_on: upload-nightly-pipeline + if: build.env("NIGHTLY") == "1" || build.pull_request.labels includes "nightly-test" || build.pull_request.labels includes "tts-test" + steps: + - label: ":full_moon: TTS · Function Test" timeout_in_minutes: 90 - depends_on: upload-nightly-pipeline - if: *nightly_or_pr_label commands: - export VLLM_ALLOW_LONG_MAX_MODEL_LEN="1" - - pytest -s -v tests/examples/ -m "advanced_model and omni and L4" --run-level "advanced_model" + - pytest -s -v tests/e2e/online_serving/test_*_expansion.py -m "advanced_model and L4 and omni" --run-level "advanced_model" + agents: + queue: "gpu_1_queue" # g6.12xlarge instance on AWS, has 4 L4 GPU + plugins: + - docker#v5.2.0: + image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT + always-pull: true + propagate-environment: true + shm-size: "8gb" + environment: + - "HF_HOME=/fsx/hf_cache" + - "HF_TOKEN" + volumes: + - "/fsx/hf_cache:/fsx/hf_cache" + + - label: ":full_moon: TTS · Perf Test" + key: nightly-tts-performance + timeout_in_minutes: 180 + commands: + - export BENCHMARK_DIR=tests/dfx/perf/results + - export VLLM_ALLOW_LONG_MAX_MODEL_LEN="1" + - pytest -s -v tests/dfx/perf/scripts/run_benchmark.py --test-config-file tests/dfx/perf/tests/test_tts.json + - buildkite-agent artifact upload "tests/dfx/perf/results/*.json" + agents: + queue: "mithril-h100-pool" + plugins: + - kubernetes: + podSpec: + containers: + - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT + resources: + limits: + nvidia.com/gpu: 1 + volumeMounts: + - name: devshm + mountPath: /dev/shm + - name: hf-cache + mountPath: /root/.cache/huggingface + env: + - name: HF_HOME + value: /root/.cache/huggingface + - name: HF_TOKEN + valueFrom: + secretKeyRef: + name: hf-token-secret + key: token + nodeSelector: + node.kubernetes.io/instance-type: gpu-h100-sxm + volumes: + - name: devshm + emptyDir: + medium: Memory + - name: hf-cache + hostPath: + path: /mnt/hf-cache + type: DirectoryOrCreate + + # Diffusion X2I suite: x2i / x2a / x2t and related non-video paths; x2v is only in "Diffusion X2V Model Test" below. + - group: ":card_index_dividers: Diffusion X2I(&A&T) Model Test" + key: nightly-diffusion-x2iat-group + depends_on: upload-nightly-pipeline + if: >- + build.env("NIGHTLY") == "1" || + build.pull_request.labels includes "nightly-test" || + build.pull_request.labels includes "diffusion-x2iat-test" + steps: + - label: ":full_moon: Diffusion X2I(&A&T) · Function Test with H100" + timeout_in_minutes: 120 + commands: + - pytest -s -v tests/e2e/online_serving/test_*_expansion.py -k "not test_wan22_expansion and not test_wan_2_1_vace_expansion and not hunyuan" -m "advanced_model and diffusion and H100" --run-level "advanced_model" + agents: + queue: "mithril-h100-pool" + plugins: + - kubernetes: + podSpec: + containers: + - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT + resources: + limits: + nvidia.com/gpu: 2 + volumeMounts: + - name: devshm + mountPath: /dev/shm + - name: hf-cache + mountPath: /root/.cache/huggingface + env: + - name: HF_HOME + value: /root/.cache/huggingface + - name: HF_TOKEN + valueFrom: + secretKeyRef: + name: hf-token-secret + key: token + nodeSelector: + node.kubernetes.io/instance-type: gpu-h100-sxm + volumes: + - name: devshm + emptyDir: + medium: Memory + - name: hf-cache + hostPath: + path: /mnt/hf-cache + type: DirectoryOrCreate + + - label: ":full_moon: Diffusion X2I(&A&T) · Function Test with L4" + timeout_in_minutes: 60 + commands: + - pytest -s -v tests/e2e/online_serving/test_*_expansion.py -k "not test_wan22_expansion and not test_wan_2_1_vace_expansion and not hunyuan" -m "advanced_model and diffusion and L4" --run-level "advanced_model" agents: queue: "gpu_4_queue" # g6.12xlarge instance on AWS, has 4 L4 GPU plugins: @@ -91,12 +278,11 @@ steps: volumes: - "/fsx/hf_cache:/fsx/hf_cache" - - label: ":full_moon: Omni · Doc Test with H100" - timeout_in_minutes: 90 - depends_on: upload-nightly-pipeline - if: *nightly_or_pr_label + - label: ":full_moon: Diffusion X2I(&A&T) · Doc Test" + timeout_in_minutes: 60 commands: - - pytest -s -v tests/examples/ -m "advanced_model and omni and H100" --run-level "advanced_model" + - export VLLM_TEST_CLEAN_GPU_MEMORY="1" + - pytest -s -v tests/examples/*/test_text_to_image.py -m "advanced_model and example and H100" --run-level "advanced_model" agents: queue: "mithril-h100-pool" plugins: @@ -131,16 +317,109 @@ steps: path: /mnt/hf-cache type: DirectoryOrCreate - - label: ":full_moon: Omni · Perf Test" - key: nightly-omni-performance + - label: ":full_moon: Diffusion X2I(&A&T) · GEBench Accuracy Test" + timeout_in_minutes: 60 + commands: + - pytest -s -v tests/e2e/accuracy/test_gebench_h100_smoke.py --run-level advanced_model --gebench-model Qwen/Qwen-Image-2512 --accuracy-judge-model QuantTrio/Qwen3-VL-30B-A3B-Instruct-AWQ --accuracy-gpu 0 --gebench-port 8093 --accuracy-workers 1 + - buildkite-agent artifact upload "tests/e2e/accuracy/artifacts/gebench_qwen-image-2512/summary*.json" + agents: + queue: "mithril-h100-pool" + plugins: + - kubernetes: + podSpec: + containers: + - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT + resources: + limits: + nvidia.com/gpu: 1 + volumeMounts: + - name: devshm + mountPath: /dev/shm + - name: hf-cache + mountPath: /root/.cache/huggingface + env: + - name: HF_HOME + value: /root/.cache/huggingface + - name: HF_TOKEN + valueFrom: + secretKeyRef: + name: hf-token-secret + key: token + nodeSelector: + node.kubernetes.io/instance-type: gpu-h100-sxm + volumes: + - name: devshm + emptyDir: + medium: Memory + - name: hf-cache + hostPath: + path: /mnt/hf-cache + type: DirectoryOrCreate + + - label: ":full_moon: Diffusion X2I(&A&T) · GEdit-Bench Accuracy Test" + timeout_in_minutes: 60 + commands: + - pytest -s -v tests/e2e/accuracy/test_gedit_bench_h100_smoke.py --run-level advanced_model --gedit-model Qwen/Qwen-Image-Edit --accuracy-judge-model QuantTrio/Qwen3-VL-30B-A3B-Instruct-AWQ --accuracy-gpu 0 --gedit-port 8093 --gedit-samples-per-group 20 --accuracy-workers 1 + - buildkite-agent artifact upload "tests/e2e/accuracy/artifacts/gedit_scores_qwen-image-edit/qwen-image-edit_all_all_vie_score_*.csv" + - buildkite-agent artifact upload "tests/e2e/accuracy/artifacts/gedit_scores_qwen-image-edit/qwen-image-edit_all_all_summary_*.json" + agents: + queue: "mithril-h100-pool" + plugins: + - kubernetes: + podSpec: + containers: + - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT + resources: + limits: + nvidia.com/gpu: 1 + volumeMounts: + - name: devshm + mountPath: /dev/shm + - name: hf-cache + mountPath: /root/.cache/huggingface + env: + - name: HF_HOME + value: /root/.cache/huggingface + - name: VLLM_HTTP_TIMEOUT_KEEP_ALIVE + value: "120" + - name: HF_TOKEN + valueFrom: + secretKeyRef: + name: hf-token-secret + key: token + nodeSelector: + node.kubernetes.io/instance-type: gpu-h100-sxm + volumes: + - name: devshm + emptyDir: + medium: Memory + - name: hf-cache + hostPath: + path: /mnt/hf-cache + type: DirectoryOrCreate + + - label: ":full_moon: Diffusion X2I(&A&T) · Perf Test" + key: nightly-diffusion-x2iat-performance timeout_in_minutes: 180 - depends_on: upload-nightly-pipeline - if: *nightly_or_pr_label commands: - - export BENCHMARK_DIR=tests/dfx/perf/results - - export VLLM_ALLOW_LONG_MAX_MODEL_LEN="1" - - pytest -s -v tests/dfx/perf/scripts/run_benchmark.py - - buildkite-agent artifact upload "tests/dfx/perf/results/*.json" + - export DIFFUSION_BENCHMARK_DIR=tests/dfx/perf/results + - export DIFFUSION_ATTENTION_BACKEND=FLASH_ATTN + - export CACHE_DIT_VERSION=1.3.0 + # [HACK]: run upload in the same command block as pytest. + # Because `exit` aborts the entire commands list. + - | + set +e + pytest -s -v tests/dfx/perf/scripts/run_diffusion_benchmark.py --test-config-file tests/dfx/perf/tests/test_qwen_image_vllm_omni.json + EXIT1=$$? + pytest -s -v tests/dfx/perf/scripts/run_diffusion_benchmark.py --test-config-file tests/dfx/perf/tests/test_qwen_image_edit_vllm_omni.json + EXIT2=$$? + pytest -s -v tests/dfx/perf/scripts/run_diffusion_benchmark.py --test-config-file tests/dfx/perf/tests/test_qwen_image_edit_2509_vllm_omni.json + EXIT3=$$? + if [ $$EXIT1 -eq 0 ] || [ $$EXIT2 -eq 0 ] || [ $$EXIT3 -eq 0 ]; then + buildkite-agent artifact upload "tests/dfx/perf/results/diffusion_result_*.json" + buildkite-agent artifact upload "tests/dfx/perf/results/logs/*.log" + fi + exit $$((EXIT1 | EXIT2 | EXIT3)) agents: queue: "mithril-h100-pool" plugins: @@ -150,7 +429,7 @@ steps: - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT resources: limits: - nvidia.com/gpu: 2 + nvidia.com/gpu: 4 volumeMounts: - name: devshm mountPath: /dev/shm @@ -175,23 +454,96 @@ steps: path: /mnt/hf-cache type: DirectoryOrCreate - # Dynamically appends steps from test-nightly-diffusion.yml into this build (same mechanism as - # pipeline.yml → test-ready.yml / test-merge.yml / test-nightly.yml). Foldable groups stay in the - # uploaded YAML (Other / Wan / Qwen-Image). - - label: ":card_index_dividers: Diffusion Model Test" - key: nightly-diffusion-model-test + # Diffusion x2v only (Wan, HunyuanVideo, …). x2i/x2a/x2t live in the X2I group above, not here. + - group: ":card_index_dividers: Diffusion X2V Model Test" + key: nightly-diffusion-x2v-group depends_on: upload-nightly-pipeline - if: *nightly_or_pr_label - commands: - - buildkite-agent pipeline upload .buildkite/test-nightly-diffusion.yml - agents: - queue: "cpu_queue_premerge" + if: >- + build.env("NIGHTLY") == "1" || + build.pull_request.labels includes "nightly-test" || + build.pull_request.labels includes "diffusion-x2v-test" + steps: + - label: ":full_moon: Diffusion X2V · Function Test" + timeout_in_minutes: 90 + commands: + - pytest -s -v tests/e2e/online_serving/test_wan22_expansion.py tests/e2e/online_serving/test_wan_2_1_vace_expansion.py tests/e2e/online_serving/test_hunyuan_video_15_expansion.py -m "advanced_model" --run-level "advanced_model" + agents: + queue: "mithril-h100-pool" + plugins: + - kubernetes: + podSpec: + containers: + - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT + resources: + limits: + nvidia.com/gpu: 2 + volumeMounts: + - name: devshm + mountPath: /dev/shm + - name: hf-cache + mountPath: /root/.cache/huggingface + env: + - name: HF_HOME + value: /root/.cache/huggingface + - name: HF_TOKEN + valueFrom: + secretKeyRef: + name: hf-token-secret + key: token + nodeSelector: + node.kubernetes.io/instance-type: gpu-h100-sxm + volumes: + - name: devshm + emptyDir: + medium: Memory + - name: hf-cache + hostPath: + path: /mnt/hf-cache + type: DirectoryOrCreate + + - label: ":full_moon: Diffusion X2V · Accuracy Test" + timeout_in_minutes: 180 + commands: + - pytest -s -v tests/e2e/accuracy/wan22_i2v/test_wan22_i2v_video_similarity.py --run-level advanced_model + agents: + queue: "mithril-h100-pool" + plugins: + - kubernetes: + podSpec: + containers: + - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT + resources: + limits: + nvidia.com/gpu: 2 + volumeMounts: + - name: devshm + mountPath: /dev/shm + - name: hf-cache + mountPath: /root/.cache/huggingface + env: + - name: HF_HOME + value: /root/.cache/huggingface + - name: HF_TOKEN + valueFrom: + secretKeyRef: + name: hf-token-secret + key: token + nodeSelector: + node.kubernetes.io/instance-type: gpu-h100-sxm + volumes: + - name: devshm + emptyDir: + medium: Memory + - name: hf-cache + hostPath: + path: /mnt/hf-cache + type: DirectoryOrCreate - label: ":bar_chart: Testcase Statistics" key: nightly-testcase-statistics timeout_in_minutes: 120 depends_on: upload-nightly-pipeline - if: *nightly_or_pr_label + if: build.env("NIGHTLY") == "1" || build.pull_request.labels includes "nightly-test" commands: - python tools/nightly/buildkite_testcase_statistics.py -o tests/dfx/perf/results/buildkite_testcase_statistics.html - buildkite-agent artifact upload "tests/dfx/perf/results/*.html" @@ -234,15 +586,17 @@ steps: key: nightly-perf-distribution depends_on: - nightly-omni-performance - - nightly-qwen-image-performance + - nightly-tts-performance + - nightly-diffusion-x2iat-performance - nightly-testcase-statistics if: build.env("NIGHTLY") == "1" commands: - pip install openpyxl - export DEFAULT_INPUT_DIR=tests/dfx/perf/results - export DEFAULT_OUTPUT_DIR=tests/dfx/perf/results + - buildkite-agent artifact download "tests/dfx/perf/results/*.json" . --step nightly-tts-performance - buildkite-agent artifact download "tests/dfx/perf/results/*.json" . --step nightly-omni-performance - - buildkite-agent artifact download "tests/dfx/perf/results/*.json" . --step nightly-qwen-image-performance + - buildkite-agent artifact download "tests/dfx/perf/results/*.json" . --step nightly-diffusion-x2iat-performance - buildkite-agent artifact download "tests/dfx/perf/results/*.html" . --step nightly-testcase-statistics - python tools/nightly/generate_nightly_perf_excel.py - python tools/nightly/generate_nightly_perf_html.py diff --git a/docs/contributing/ci/CI_5levels.md b/docs/contributing/ci/CI_5levels.md index 9306035738..b0428ddd7d 100644 --- a/docs/contributing/ci/CI_5levels.md +++ b/docs/contributing/ci/CI_5levels.md @@ -86,7 +86,8 @@ Through five levels (L1-L5) and common (Common) specifications, the system clari /tests/e2e/online_serving/test_{model_name}_expansion.py
/tests/e2e/offline_inference/test_{model_name}_expansion.py
Performance:
- /tests/dfx/perf/tests/test.json
+ /tests/dfx/perf/tests/test_qwen_omni.json (Omni), test_tts.json (TTS),
+ and /tests/dfx/perf/tests/test_{diffusion_model}_vllm_omni.json (Diffusion)
Doc Test:
tests/example/online_serving/test_{model_name}.py
tests/example/offline_inference/test_{model_name}.py @@ -530,13 +531,13 @@ L4 level testing is a comprehensive quality audit before a version release. It e ### 3.2 Testing Content and Scope - ***Full Functionality Testing***: Executes all test cases defined in `test_{model_name}_expansion.py`, covering all implemented features, positive flows, boundary conditions, and exception handling. -- ***Performance Testing***: Uses the `tests/dfx/perf/tests/test.json` configuration file to drive performance testing tools for stress, load, and endurance tests, collecting metrics like throughput, response time, and resource utilization. +- ***Performance Testing***: Uses `tests/dfx/perf/tests/test_qwen_omni.json`, `tests/dfx/perf/tests/test_tts.json`, and diffusion configs in the form `tests/dfx/perf/tests/test_*_vllm_omni.json` (passed to `run_benchmark.py` via `--test-config-file`) to drive performance testing tools for stress, load, and endurance tests, collecting metrics like throughput, response time, and resource utilization. - ***Documentation Testing***: Verifies whether the example code provided to users is runnable and its results match the description. ### 3.3 Test Directory and Execution Files - ***Functional Testing***: Same directories as L3. -- ***Performance Test Configuration***: `tests/dfx/perf/tests/test.json` +- ***Performance Test Configuration***: `tests/dfx/perf/tests/test_qwen_omni.json`, `tests/dfx/perf/tests/test_tts.json`, and diffusion configs `tests/dfx/perf/tests/test_*_vllm_omni.json` (e.g. `test_qwen_image_vllm_omni.json`) - ***Documentation Example Tests***: - - `tests/example/online_serving/test_{model_name}.py` - `tests/example/offline_inference/test_{model_name}.py` diff --git a/docs/contributing/ci/test_examples/l4_performance_tests.inc.md b/docs/contributing/ci/test_examples/l4_performance_tests.inc.md index 8093e1459f..f1f3073dc5 100644 --- a/docs/contributing/ci/test_examples/l4_performance_tests.inc.md +++ b/docs/contributing/ci/test_examples/l4_performance_tests.inc.md @@ -1,4 +1,4 @@ -When you want to add L4-level ***performance test*** cases, you can refer to the following format for case addition in tests/dfx/perf/tests/test.json: +When you want to add L4-level ***performance test*** cases, you can refer to the following format for case addition in `tests/dfx/perf/tests/test_qwen_omni.json`, `tests/dfx/perf/tests/test_tts.json`, or diffusion configs such as `tests/dfx/perf/tests/test_*_vllm_omni.json` (selected via `pytest ... run_benchmark.py --test-config-file `): ```JSON { diff --git a/docs/contributing/ci/test_guide.md b/docs/contributing/ci/test_guide.md index 425f24332c..08b2e3b4ea 100644 --- a/docs/contributing/ci/test_guide.md +++ b/docs/contributing/ci/test_guide.md @@ -45,7 +45,6 @@ Our test scripts use the pytest framework. First, please use `git clone https:// === "L3 level & L4 level" ```bash - cd tests pytest -s -v -m "advanced_model" --run-level=advanced_model ``` If you only want to run L3 test case, you can use: @@ -60,9 +59,9 @@ Our test scripts use the pytest framework. First, please use `git clone https:// ```bash pytest -s -v -m "core_model and distributed_cuda and L4" --run-level=core_model ``` - Note: To run performance tests, use: + Note: To run performance tests (defaults to ``test_qwen_omni.json``; use ``--test-config-file tests/dfx/perf/tests/test_tts.json`` for TTS): ```bash - pytest -s -v perf/scripts/run_benchmark.py + pytest -s -v tests/dfx/perf/scripts/run_benchmark.py ``` The latest L3 test commands for various test suites can be found in the [pipeline](https://github.com/vllm-project/vllm-omni/blob/main/.buildkite/test-merge.yml). diff --git a/tests/dfx/conftest.py b/tests/dfx/conftest.py index e54141b344..997f25e6e5 100644 --- a/tests/dfx/conftest.py +++ b/tests/dfx/conftest.py @@ -2,6 +2,8 @@ from pathlib import Path from typing import Any +import pytest + from tests.conftest import modify_stage_config @@ -95,3 +97,13 @@ def create_benchmark_indices( indices.append((test_name, idx)) return indices + + +def pytest_addoption(parser: pytest.Parser) -> None: + """Register shared CLI options for DFX benchmark suites.""" + parser.addoption( + "--test-config-file", + action="store", + default=None, + help=("Path to benchmark config JSON. Example: --test-config-file tests/dfx/perf/tests/test_tts.json"), + ) diff --git a/tests/dfx/perf/scripts/run_benchmark.py b/tests/dfx/perf/scripts/run_benchmark.py index 67dedcd048..d5ef1b49e7 100644 --- a/tests/dfx/perf/scripts/run_benchmark.py +++ b/tests/dfx/perf/scripts/run_benchmark.py @@ -21,10 +21,30 @@ os.environ["VLLM_TEST_CLEAN_GPU_MEMORY"] = "0" -CONFIG_FILE_PATH = str(Path(__file__).parent.parent / "tests" / "test.json") -BENCHMARK_CONFIGS = load_configs(CONFIG_FILE_PATH) -STAGE_INIT_TIMEOUT = 600 +def _get_config_file_from_argv() -> str | None: + """Read ``--test-config-file`` from ``sys.argv`` at import time so parametrization can use it.""" + import sys + + for i, arg in enumerate(sys.argv): + if arg == "--test-config-file" and i + 1 < len(sys.argv): + return sys.argv[i + 1] + if arg.startswith("--test-config-file="): + return arg.split("=", 1)[1] + return None + + +_PERF_TESTS_DIR = Path(__file__).resolve().parent.parent / "tests" +_DEFAULT_CONFIG_FILE = str(_PERF_TESTS_DIR / "test_qwen_omni.json") + +CONFIG_FILE_PATH = _get_config_file_from_argv() +if CONFIG_FILE_PATH is None: + print( + "No --test-config-file in argv, using default: tests/dfx/perf/tests/test_qwen_omni.json " + "(override with e.g. --test-config-file tests/dfx/perf/tests/test_tts.json)" + ) + CONFIG_FILE_PATH = _DEFAULT_CONFIG_FILE +BENCHMARK_CONFIGS = load_configs(CONFIG_FILE_PATH) STAGE_CONFIGS_DIR = Path(__file__).parent.parent / "stage_configs" test_params = create_unique_server_params(BENCHMARK_CONFIGS, STAGE_CONFIGS_DIR) @@ -44,7 +64,7 @@ def omni_server(request): print(f"Starting OmniServer with test: {test_name}, model: {model}") - server_args = ["--stage-init-timeout", str(STAGE_INIT_TIMEOUT), "--init-timeout", "900"] + server_args = ["--stage-init-timeout", "300", "--init-timeout", "900"] if stage_config_path: server_args = ["--stage-configs-path", stage_config_path] + server_args with OmniServer(model, server_args) as server: @@ -97,8 +117,6 @@ def run_benchmark( ["vllm", "bench", "serve", "--omni"] + args + [ - "--num-warmups", - "2", "--save-result", "--result-dir", os.environ.get("BENCHMARK_DIR", "tests"), @@ -141,7 +159,6 @@ def run_benchmark( result["random_output_len"] = random_output_len with open(result_path, "w", encoding="utf-8") as f: json.dump(result, f, ensure_ascii=False, indent=2) - return result @@ -207,10 +224,6 @@ def _resolve_baseline_value( f"or request_rate={request_rate!r}; keys={list(baseline_raw.keys())!r}" ) if isinstance(baseline_raw, (list, tuple)): - if sweep_index is None: - raise ValueError("list baseline requires sweep_index") - if not (0 <= sweep_index < len(baseline_raw)): - raise IndexError(f"baseline list len={len(baseline_raw)} has no index {sweep_index}") return baseline_raw[sweep_index] return baseline_raw @@ -245,14 +258,14 @@ def assert_result( ) -> None: assert result["completed"] == num_prompt, "Request failures exist" baseline_data = params.get("baseline", {}) - thresholds = _baseline_thresholds_for_step( - baseline_data, - sweep_index=sweep_index, - max_concurrency=max_concurrency, - request_rate=request_rate, - ) - for metric_name, baseline_value in thresholds.items(): + for metric_name, baseline_raw in baseline_data.items(): current_value = result[metric_name] + baseline_value = _resolve_baseline_value( + baseline_raw, + sweep_index=sweep_index, + max_concurrency=max_concurrency, + request_rate=request_rate, + ) if "throughput" in metric_name: if current_value <= baseline_value: print( diff --git a/tests/dfx/perf/scripts/run_diffusion_benchmark.py b/tests/dfx/perf/scripts/run_diffusion_benchmark.py index 123f21405e..8eeeec8df2 100644 --- a/tests/dfx/perf/scripts/run_diffusion_benchmark.py +++ b/tests/dfx/perf/scripts/run_diffusion_benchmark.py @@ -5,8 +5,8 @@ - vllm-omni (default): starts DiffusionServer via vllm_omni.entrypoints.cli.main, benchmarks with diffusion_benchmark_serving.py --backend vllm-omni -A config JSON file is REQUIRED via --config-file: - pytest run_diffusion_benchmark.py --config-file tests/dfx/perf/tests/test_qwen_image_vllm_omni.json +A config JSON file is REQUIRED via --test-config-file: + pytest run_diffusion_benchmark.py --test-config-file tests/dfx/perf/tests/test_qwen_image_vllm_omni.json JSON config entries use a "server_type" field, and this runner executes the vllm-omni path. @@ -55,16 +55,16 @@ def _get_config_file_from_argv() -> str | None: - """Read --config-file from sys.argv at import time so pytest parametrize can use it. + """Read --test-config-file from sys.argv at import time so pytest parametrize can use it. pytest_addoption (below) registers the same flag so pytest does not reject it. - Supports both ``--config-file path`` and ``--config-file=path`` forms. + Supports both ``--test-config-file path`` and ``--test-config-file=path`` forms. Returns None if the flag is not present; callers must handle the missing case. """ for i, arg in enumerate(sys.argv): - if arg == "--config-file" and i + 1 < len(sys.argv): + if arg == "--test-config-file" and i + 1 < len(sys.argv): return sys.argv[i + 1] - if arg.startswith("--config-file="): + if arg.startswith("--test-config-file="): return arg.split("=", 1)[1] return None @@ -133,19 +133,6 @@ def _append_to_aggregated_file(record: dict[str, Any]) -> None: json.dump(records, f, indent=2, ensure_ascii=False) -# Register --config-file with pytest so it does not reject the argument. -def pytest_addoption(parser: pytest.Parser) -> None: - parser.addoption( - "--config-file", - action="store", - default=None, - help=( - "Path to the benchmark config JSON file (required). " - "Example: --config-file tests/dfx/perf/tests/test_qwen_image_vllm_omni.json" - ), - ) - - _server_lock = threading.Lock() # --------------------------------------------------------------------------- diff --git a/tests/dfx/perf/tests/test.json b/tests/dfx/perf/tests/test_qwen_omni.json similarity index 92% rename from tests/dfx/perf/tests/test.json rename to tests/dfx/perf/tests/test_qwen_omni.json index 159e27a064..4662f8c0c7 100644 --- a/tests/dfx/perf/tests/test.json +++ b/tests/dfx/perf/tests/test_qwen_omni.json @@ -329,37 +329,5 @@ } } ] - }, - { - "test_name": "test_qwen3_tts", - "server_params": { - "model": "Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice" - }, - "benchmark_params": [ - { - "dataset_name": "random", - "backend": "openai-audio-speech", - "endpoint": "/v1/audio/speech", - "num_prompts": [ - 10, - 40 - ], - "max_concurrency": [ - 1, - 4 - ], - "random_input_len": 100, - "random_output_len": 100, - "extra_body": { - "voice": "Vivian", - "language": "English" - }, - "percentile-metrics": "ttft,e2el,audio_rtf,audio_ttfp,audio_duration", - "baseline": { - "mean_audio_ttfp_ms": [6000, 6000], - "mean_audio_rtf": [0.3, 0.3] - } - } - ] } ] diff --git a/tests/dfx/perf/tests/test_tts.json b/tests/dfx/perf/tests/test_tts.json new file mode 100644 index 0000000000..3583b45b4f --- /dev/null +++ b/tests/dfx/perf/tests/test_tts.json @@ -0,0 +1,34 @@ +[ + { + "test_name": "test_qwen3_tts", + "server_params": { + "model": "Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice" + }, + "benchmark_params": [ + { + "dataset_name": "random", + "backend": "openai-audio-speech", + "endpoint": "/v1/audio/speech", + "num_prompts": [ + 10, + 40 + ], + "max_concurrency": [ + 1, + 4 + ], + "random_input_len": 100, + "random_output_len": 100, + "extra_body": { + "voice": "Vivian", + "language": "English" + }, + "percentile-metrics": "ttft,e2el,audio_rtf,audio_ttfp,audio_duration", + "baseline": { + "mean_audio_ttfp_ms": [6000, 6000], + "mean_audio_rtf": [0.3, 0.3] + } + } + ] + } +]