From 949a272f3d4cd93ecdc0652eb805a87709c65adf Mon Sep 17 00:00:00 2001 From: dengyunyang <584797741@qq.com> Date: Sat, 30 May 2026 09:49:59 +0800 Subject: [PATCH] perf test adapt to new deploy config Signed-off-by: dengyunyang <584797741@qq.com> --- .buildkite/test-nightly.yml | 54 ------------------- ...json => test_hunyuan_image_tp2_cfgp2.json} | 20 ++++--- ...2.json => test_hunyuan_image_tp2_sp2.json} | 22 ++++---- ...4_fp8.json => test_hunyuan_image_tp4.json} | 20 ++++--- 4 files changed, 28 insertions(+), 88 deletions(-) rename tests/dfx/perf/tests/{test_hunyuan_image_tp2_fp8_cfgp2.json => test_hunyuan_image_tp2_cfgp2.json} (57%) rename tests/dfx/perf/tests/{test_hunyuan_image_tp2_fp8_sp2.json => test_hunyuan_image_tp2_sp2.json} (54%) rename tests/dfx/perf/tests/{test_hunyuan_image_tp4_fp8.json => test_hunyuan_image_tp4.json} (56%) diff --git a/.buildkite/test-nightly.yml b/.buildkite/test-nightly.yml index cdd8ee8e9eb..d23080cbee1 100644 --- a/.buildkite/test-nightly.yml +++ b/.buildkite/test-nightly.yml @@ -1033,60 +1033,6 @@ steps: path: /mnt/hf-cache type: DirectoryOrCreate - - label: ":full_moon: Diffusion X2I(&A&T) · HunyuanImage3 · DiT Perf Test" - key: nightly-hunyuan-image3-performance - soft_fail: true - timeout_in_minutes: 120 - if: build.env("RUN_HUNYUAN_IMAGE3_PERF") == "1" - commands: - - export DIFFUSION_BENCHMARK_DIR=tests/dfx/perf/results - - export DIFFUSION_ATTENTION_BACKEND=FLASH_ATTN - - export CACHE_DIT_VERSION=1.3.0 - - | - set +e - pytest -s -v tests/dfx/perf/scripts/run_diffusion_benchmark.py --test-config-file tests/dfx/perf/tests/test_hunyuan_image_tp4_fp8.json - EXIT1=$$? - pytest -s -v tests/dfx/perf/scripts/run_diffusion_benchmark.py --test-config-file tests/dfx/perf/tests/test_hunyuan_image_tp2_fp8_sp2.json - EXIT2=$$? - pytest -s -v tests/dfx/perf/scripts/run_diffusion_benchmark.py --test-config-file tests/dfx/perf/tests/test_hunyuan_image_tp2_fp8_cfgp2.json - EXIT3=$$? - buildkite-agent artifact upload "tests/dfx/perf/results/diffusion_result_*.json" - buildkite-agent artifact upload "tests/dfx/perf/results/logs/*.log" - exit $$((EXIT1 | EXIT2 | EXIT3)) - agents: - queue: "mithril-h100-pool" - plugins: - - kubernetes: - podSpec: - containers: - - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT - resources: - limits: - nvidia.com/gpu: 4 - volumeMounts: - - name: devshm - mountPath: /dev/shm - - name: hf-cache - mountPath: /root/.cache/huggingface - env: - - name: HF_HOME - value: /root/.cache/huggingface - - name: HF_TOKEN - valueFrom: - secretKeyRef: - name: hf-token-secret - key: token - nodeSelector: - node.kubernetes.io/instance-type: gpu-h100-sxm - volumes: - - name: devshm - emptyDir: - medium: Memory - - name: hf-cache - hostPath: - path: /mnt/hf-cache - type: DirectoryOrCreate - # Diffusion x2v only (Wan, HunyuanVideo, …). x2i/x2a/x2t live in the X2I group above, not here. - group: ":card_index_dividers: Diffusion X2V Model Test" key: nightly-diffusion-x2v-group diff --git a/tests/dfx/perf/tests/test_hunyuan_image_tp2_fp8_cfgp2.json b/tests/dfx/perf/tests/test_hunyuan_image_tp2_cfgp2.json similarity index 57% rename from tests/dfx/perf/tests/test_hunyuan_image_tp2_fp8_cfgp2.json rename to tests/dfx/perf/tests/test_hunyuan_image_tp2_cfgp2.json index e6271545f77..e6d7ce65948 100644 --- a/tests/dfx/perf/tests/test_hunyuan_image_tp2_fp8_cfgp2.json +++ b/tests/dfx/perf/tests/test_hunyuan_image_tp2_cfgp2.json @@ -1,35 +1,33 @@ [ { - "test_name": "test_hunyuan_image_tp2_fp8_cfgp2", - "description": "TP=2 Quantization=fp8 CfgP=2 baseline", + "test_name": "test_hunyuan_image_tp2_cfgp2", + "description": "TP=2 CfgP=2 baseline", "server_type": "vllm-omni", "server_params": { "model": "tencent/HunyuanImage-3.0-Instruct", "serve_args": { + "deploy_config": "../vllm_omni/deploy/hunyuan_image3_dit.yaml", "tensor-parallel-size": 2, "cfg-parallel-size": 2, - "quantization": "fp8", - "distributed-executor-backend": "mp", - "enforce-eager": true, "enable-diffusion-pipeline-profiler": true } }, "benchmark_params": [ { - "name": "1024x1024_steps50", + "name": "1024x1024_steps8", "dataset": "random", "task": "t2i", "width": 1024, "height": 1024, - "num-inference-steps": 50, + "num-inference-steps": 8, "num-prompts": 10, "max-concurrency": 1, "skip-performance-assertion": true, "baseline": { - "throughput_qps": 0.1035, - "latency_p99": 9.9057, - "peak_memory_mb_max": 66470, - "peak_memory_mb_mean": 66470 + "throughput_qps": 0.21, + "latency_p99": 4.7469, + "peak_memory_mb_max": 101100, + "peak_memory_mb_mean": 101100 } } ] diff --git a/tests/dfx/perf/tests/test_hunyuan_image_tp2_fp8_sp2.json b/tests/dfx/perf/tests/test_hunyuan_image_tp2_sp2.json similarity index 54% rename from tests/dfx/perf/tests/test_hunyuan_image_tp2_fp8_sp2.json rename to tests/dfx/perf/tests/test_hunyuan_image_tp2_sp2.json index 0112fc51a0e..fbbe1643e71 100644 --- a/tests/dfx/perf/tests/test_hunyuan_image_tp2_fp8_sp2.json +++ b/tests/dfx/perf/tests/test_hunyuan_image_tp2_sp2.json @@ -1,35 +1,33 @@ [ { - "test_name": "test_hunyuan_image_tp2_fp8_sp2", - "description": "TP=2 Quantization=fp8 SP=2 baseline", + "test_name": "test_hunyuan_image_tp2_sp2", + "description": "TP=2 SP=2 baseline", "server_type": "vllm-omni", "server_params": { "model": "tencent/HunyuanImage-3.0-Instruct", "serve_args": { + "deploy-config": "../vllm_omni/deploy/hunyuan_image3_dit.yaml", "tensor-parallel-size": 2, - "usp": 2, - "quantization": "fp8", - "distributed-executor-backend": "mp", - "enforce-eager": true, + "ulysses-degree": 2, "enable-diffusion-pipeline-profiler": true } }, "benchmark_params": [ { - "name": "1024x1024_steps50", + "name": "1024x1024_steps8", "dataset": "random", "task": "t2i", "width": 1024, "height": 1024, - "num-inference-steps": 50, + "num-inference-steps": 8, "num-prompts": 10, "max-concurrency": 1, "skip-performance-assertion": true, "baseline": { - "throughput_qps": 0.08, - "latency_p99": 12.0731, - "peak_memory_mb_max": 66314, - "peak_memory_mb_mean": 66314 + "throughput_qps": 0.20, + "latency_p99": 5.1025, + "peak_memory_mb_max": 97402, + "peak_memory_mb_mean": 97402 } } ] diff --git a/tests/dfx/perf/tests/test_hunyuan_image_tp4_fp8.json b/tests/dfx/perf/tests/test_hunyuan_image_tp4.json similarity index 56% rename from tests/dfx/perf/tests/test_hunyuan_image_tp4_fp8.json rename to tests/dfx/perf/tests/test_hunyuan_image_tp4.json index 8f6bad2f84f..8f4c0ec5000 100644 --- a/tests/dfx/perf/tests/test_hunyuan_image_tp4_fp8.json +++ b/tests/dfx/perf/tests/test_hunyuan_image_tp4.json @@ -1,34 +1,32 @@ [ { - "test_name": "test_hunyuan_image_tp4_fp8", - "description": "TP=4 Quantization=fp8 baseline", + "test_name": "test_hunyuan_image_tp4", + "description": "TP=4 baseline", "server_type": "vllm-omni", "server_params": { "model": "tencent/HunyuanImage-3.0-Instruct", "serve_args": { + "deploy-config": "../vllm_omni/deploy/hunyuan_image3_dit.yaml", "tensor-parallel-size": 4, - "quantization": "fp8", - "distributed-executor-backend": "mp", - "enforce-eager": true, "enable-diffusion-pipeline-profiler": true } }, "benchmark_params": [ { - "name": "1024x1024_steps50", + "name": "1024x1024_steps8", "dataset": "random", "task": "t2i", "width": 1024, "height": 1024, - "num-inference-steps": 50, + "num-inference-steps": 8, "num-prompts": 10, "max-concurrency": 1, "skip-performance-assertion": true, "baseline": { - "throughput_qps": 0.08, - "latency_p99": 13.1227, - "peak_memory_mb_max": 46838, - "peak_memory_mb_mean": 46838 + "throughput_qps": 0.21, + "latency_p99": 4.9235, + "peak_memory_mb_max": 57576, + "peak_memory_mb_mean": 57576 } } ]