From 949a272f3d4cd93ecdc0652eb805a87709c65adf Mon Sep 17 00:00:00 2001
From: dengyunyang <584797741@qq.com>
Date: Sat, 30 May 2026 09:49:59 +0800
Subject: [PATCH] perf test adapt to new deploy config

Signed-off-by: dengyunyang <584797741@qq.com>
---
 .buildkite/test-nightly.yml                   | 54 -------------------
 ...json => test_hunyuan_image_tp2_cfgp2.json} | 20 ++++---
 ...2.json => test_hunyuan_image_tp2_sp2.json} | 22 ++++----
 ...4_fp8.json => test_hunyuan_image_tp4.json} | 20 ++++---
 4 files changed, 28 insertions(+), 88 deletions(-)
 rename tests/dfx/perf/tests/{test_hunyuan_image_tp2_fp8_cfgp2.json => test_hunyuan_image_tp2_cfgp2.json} (57%)
 rename tests/dfx/perf/tests/{test_hunyuan_image_tp2_fp8_sp2.json => test_hunyuan_image_tp2_sp2.json} (54%)
 rename tests/dfx/perf/tests/{test_hunyuan_image_tp4_fp8.json => test_hunyuan_image_tp4.json} (56%)

diff --git a/.buildkite/test-nightly.yml b/.buildkite/test-nightly.yml
index cdd8ee8e9eb..d23080cbee1 100644
--- a/.buildkite/test-nightly.yml
+++ b/.buildkite/test-nightly.yml
@@ -1033,60 +1033,6 @@ steps:
                       path: /mnt/hf-cache
                       type: DirectoryOrCreate
 
-      - label: ":full_moon: Diffusion X2I(&A&T) · HunyuanImage3 · DiT Perf Test"
-        key: nightly-hunyuan-image3-performance
-        soft_fail: true
-        timeout_in_minutes: 120
-        if: build.env("RUN_HUNYUAN_IMAGE3_PERF") == "1"
-        commands:
-          - export DIFFUSION_BENCHMARK_DIR=tests/dfx/perf/results
-          - export DIFFUSION_ATTENTION_BACKEND=FLASH_ATTN
-          - export CACHE_DIT_VERSION=1.3.0
-          - |
-            set +e
-            pytest -s -v tests/dfx/perf/scripts/run_diffusion_benchmark.py --test-config-file tests/dfx/perf/tests/test_hunyuan_image_tp4_fp8.json
-            EXIT1=$$?
-            pytest -s -v tests/dfx/perf/scripts/run_diffusion_benchmark.py --test-config-file tests/dfx/perf/tests/test_hunyuan_image_tp2_fp8_sp2.json
-            EXIT2=$$?
-            pytest -s -v tests/dfx/perf/scripts/run_diffusion_benchmark.py --test-config-file tests/dfx/perf/tests/test_hunyuan_image_tp2_fp8_cfgp2.json
-            EXIT3=$$?
-            buildkite-agent artifact upload "tests/dfx/perf/results/diffusion_result_*.json"
-            buildkite-agent artifact upload "tests/dfx/perf/results/logs/*.log"
-            exit $$((EXIT1 | EXIT2 | EXIT3))
-        agents:
-          queue: "mithril-h100-pool"
-        plugins:
-          - kubernetes:
-              podSpec:
-                containers:
-                  - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
-                    resources:
-                      limits:
-                        nvidia.com/gpu: 4
-                    volumeMounts:
-                      - name: devshm
-                        mountPath: /dev/shm
-                      - name: hf-cache
-                        mountPath: /root/.cache/huggingface
-                    env:
-                      - name: HF_HOME
-                        value: /root/.cache/huggingface
-                      - name: HF_TOKEN
-                        valueFrom:
-                          secretKeyRef:
-                            name: hf-token-secret
-                            key: token
-                nodeSelector:
-                  node.kubernetes.io/instance-type: gpu-h100-sxm
-                volumes:
-                  - name: devshm
-                    emptyDir:
-                      medium: Memory
-                  - name: hf-cache
-                    hostPath:
-                      path: /mnt/hf-cache
-                      type: DirectoryOrCreate
-
   # Diffusion x2v only (Wan, HunyuanVideo, …). x2i/x2a/x2t live in the X2I group above, not here.
   - group: ":card_index_dividers: Diffusion X2V Model Test"
     key: nightly-diffusion-x2v-group
diff --git a/tests/dfx/perf/tests/test_hunyuan_image_tp2_fp8_cfgp2.json b/tests/dfx/perf/tests/test_hunyuan_image_tp2_cfgp2.json
similarity index 57%
rename from tests/dfx/perf/tests/test_hunyuan_image_tp2_fp8_cfgp2.json
rename to tests/dfx/perf/tests/test_hunyuan_image_tp2_cfgp2.json
index e6271545f77..e6d7ce65948 100644
--- a/tests/dfx/perf/tests/test_hunyuan_image_tp2_fp8_cfgp2.json
+++ b/tests/dfx/perf/tests/test_hunyuan_image_tp2_cfgp2.json
@@ -1,35 +1,33 @@
 [
     {
-        "test_name": "test_hunyuan_image_tp2_fp8_cfgp2",
-        "description": "TP=2 Quantization=fp8 CfgP=2 baseline",
+        "test_name": "test_hunyuan_image_tp2_cfgp2",
+        "description": "TP=2 CfgP=2 baseline",
         "server_type": "vllm-omni",
         "server_params": {
             "model": "tencent/HunyuanImage-3.0-Instruct",
             "serve_args": {
+                "deploy_config": "../vllm_omni/deploy/hunyuan_image3_dit.yaml",
                 "tensor-parallel-size": 2,
                 "cfg-parallel-size": 2,
-                "quantization": "fp8",
-                "distributed-executor-backend": "mp",
-                "enforce-eager": true,
                 "enable-diffusion-pipeline-profiler": true
             }
         },
         "benchmark_params": [
             {
-                "name": "1024x1024_steps50",
+                "name": "1024x1024_steps8",
                 "dataset": "random",
                 "task": "t2i",
                 "width": 1024,
                 "height": 1024,
-                "num-inference-steps": 50,
+                "num-inference-steps": 8,
                 "num-prompts": 10,
                 "max-concurrency": 1,
                 "skip-performance-assertion": true,
                 "baseline": {
-                    "throughput_qps": 0.1035,
-                    "latency_p99": 9.9057,
-                    "peak_memory_mb_max": 66470,
-                    "peak_memory_mb_mean": 66470
+                    "throughput_qps": 0.21,
+                    "latency_p99": 4.7469,
+                    "peak_memory_mb_max": 101100,
+                    "peak_memory_mb_mean": 101100
                 }
             }
         ]
diff --git a/tests/dfx/perf/tests/test_hunyuan_image_tp2_fp8_sp2.json b/tests/dfx/perf/tests/test_hunyuan_image_tp2_sp2.json
similarity index 54%
rename from tests/dfx/perf/tests/test_hunyuan_image_tp2_fp8_sp2.json
rename to tests/dfx/perf/tests/test_hunyuan_image_tp2_sp2.json
index 0112fc51a0e..fbbe1643e71 100644
--- a/tests/dfx/perf/tests/test_hunyuan_image_tp2_fp8_sp2.json
+++ b/tests/dfx/perf/tests/test_hunyuan_image_tp2_sp2.json
@@ -1,35 +1,33 @@
 [
     {
-        "test_name": "test_hunyuan_image_tp2_fp8_sp2",
-        "description": "TP=2 Quantization=fp8 SP=2 baseline",
+        "test_name": "test_hunyuan_image_tp2_sp2",
+        "description": "TP=2 SP=2 baseline",
         "server_type": "vllm-omni",
         "server_params": {
             "model": "tencent/HunyuanImage-3.0-Instruct",
             "serve_args": {
+                "deploy-config": "../vllm_omni/deploy/hunyuan_image3_dit.yaml",
                 "tensor-parallel-size": 2,
-                "usp": 2,
-                "quantization": "fp8",
-                "distributed-executor-backend": "mp",
-                "enforce-eager": true,
+                "ulysses-degree": 2,
                 "enable-diffusion-pipeline-profiler": true
             }
         },
         "benchmark_params": [
             {
-                "name": "1024x1024_steps50",
+                "name": "1024x1024_steps8",
                 "dataset": "random",
                 "task": "t2i",
                 "width": 1024,
                 "height": 1024,
-                "num-inference-steps": 50,
+                "num-inference-steps": 8,
                 "num-prompts": 10,
                 "max-concurrency": 1,
                 "skip-performance-assertion": true,
                 "baseline": {
-                    "throughput_qps": 0.08,
-                    "latency_p99": 12.0731,
-                    "peak_memory_mb_max": 66314,
-                    "peak_memory_mb_mean": 66314
+                    "throughput_qps": 0.20,
+                    "latency_p99": 5.1025,
+                    "peak_memory_mb_max": 97402,
+                    "peak_memory_mb_mean": 97402
                 }
             }
         ]
diff --git a/tests/dfx/perf/tests/test_hunyuan_image_tp4_fp8.json b/tests/dfx/perf/tests/test_hunyuan_image_tp4.json
similarity index 56%
rename from tests/dfx/perf/tests/test_hunyuan_image_tp4_fp8.json
rename to tests/dfx/perf/tests/test_hunyuan_image_tp4.json
index 8f6bad2f84f..8f4c0ec5000 100644
--- a/tests/dfx/perf/tests/test_hunyuan_image_tp4_fp8.json
+++ b/tests/dfx/perf/tests/test_hunyuan_image_tp4.json
@@ -1,34 +1,32 @@
 [
     {
-        "test_name": "test_hunyuan_image_tp4_fp8",
-        "description": "TP=4 Quantization=fp8 baseline",
+        "test_name": "test_hunyuan_image_tp4",
+        "description": "TP=4 baseline",
         "server_type": "vllm-omni",
         "server_params": {
             "model": "tencent/HunyuanImage-3.0-Instruct",
             "serve_args": {
+                "deploy-config": "../vllm_omni/deploy/hunyuan_image3_dit.yaml",
                 "tensor-parallel-size": 4,
-                "quantization": "fp8",
-                "distributed-executor-backend": "mp",
-                "enforce-eager": true,
                 "enable-diffusion-pipeline-profiler": true
             }
         },
         "benchmark_params": [
             {
-                "name": "1024x1024_steps50",
+                "name": "1024x1024_steps8",
                 "dataset": "random",
                 "task": "t2i",
                 "width": 1024,
                 "height": 1024,
-                "num-inference-steps": 50,
+                "num-inference-steps": 8,
                 "num-prompts": 10,
                 "max-concurrency": 1,
                 "skip-performance-assertion": true,
                 "baseline": {
-                    "throughput_qps": 0.08,
-                    "latency_p99": 13.1227,
-                    "peak_memory_mb_max": 46838,
-                    "peak_memory_mb_mean": 46838
+                    "throughput_qps": 0.21,
+                    "latency_p99": 4.9235,
+                    "peak_memory_mb_max": 57576,
+                    "peak_memory_mb_mean": 57576
                 }
             }
         ]