Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 0 additions & 54 deletions .buildkite/test-nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1033,60 +1033,6 @@ steps:
path: /mnt/hf-cache
type: DirectoryOrCreate

- label: ":full_moon: Diffusion X2I(&A&T) · HunyuanImage3 · DiT Perf Test"
key: nightly-hunyuan-image3-performance
soft_fail: true
timeout_in_minutes: 120
if: build.env("RUN_HUNYUAN_IMAGE3_PERF") == "1"
commands:
- export DIFFUSION_BENCHMARK_DIR=tests/dfx/perf/results
- export DIFFUSION_ATTENTION_BACKEND=FLASH_ATTN
- export CACHE_DIT_VERSION=1.3.0
- |
set +e
pytest -s -v tests/dfx/perf/scripts/run_diffusion_benchmark.py --test-config-file tests/dfx/perf/tests/test_hunyuan_image_tp4_fp8.json
EXIT1=$$?
pytest -s -v tests/dfx/perf/scripts/run_diffusion_benchmark.py --test-config-file tests/dfx/perf/tests/test_hunyuan_image_tp2_fp8_sp2.json
EXIT2=$$?
pytest -s -v tests/dfx/perf/scripts/run_diffusion_benchmark.py --test-config-file tests/dfx/perf/tests/test_hunyuan_image_tp2_fp8_cfgp2.json
EXIT3=$$?
buildkite-agent artifact upload "tests/dfx/perf/results/diffusion_result_*.json"
buildkite-agent artifact upload "tests/dfx/perf/results/logs/*.log"
exit $$((EXIT1 | EXIT2 | EXIT3))
agents:
queue: "mithril-h100-pool"
plugins:
- kubernetes:
podSpec:
containers:
- image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
resources:
limits:
nvidia.com/gpu: 4
volumeMounts:
- name: devshm
mountPath: /dev/shm
- name: hf-cache
mountPath: /root/.cache/huggingface
env:
- name: HF_HOME
value: /root/.cache/huggingface
- name: HF_TOKEN
valueFrom:
secretKeyRef:
name: hf-token-secret
key: token
nodeSelector:
node.kubernetes.io/instance-type: gpu-h100-sxm
volumes:
- name: devshm
emptyDir:
medium: Memory
- name: hf-cache
hostPath:
path: /mnt/hf-cache
type: DirectoryOrCreate

# Diffusion x2v only (Wan, HunyuanVideo, …). x2i/x2a/x2t live in the X2I group above, not here.
- group: ":card_index_dividers: Diffusion X2V Model Test"
key: nightly-diffusion-x2v-group
Expand Down
Original file line number Diff line number Diff line change
@@ -1,35 +1,33 @@
[
{
"test_name": "test_hunyuan_image_tp2_fp8_cfgp2",
"description": "TP=2 Quantization=fp8 CfgP=2 baseline",
"test_name": "test_hunyuan_image_tp2_cfgp2",
"description": "TP=2 CfgP=2 baseline",
"server_type": "vllm-omni",
"server_params": {
"model": "tencent/HunyuanImage-3.0-Instruct",
"serve_args": {
"deploy_config": "../vllm_omni/deploy/hunyuan_image3_dit.yaml",
"tensor-parallel-size": 2,
"cfg-parallel-size": 2,
"quantization": "fp8",
"distributed-executor-backend": "mp",
"enforce-eager": true,
"enable-diffusion-pipeline-profiler": true
}
},
"benchmark_params": [
{
"name": "1024x1024_steps50",
"name": "1024x1024_steps8",
"dataset": "random",
"task": "t2i",
"width": 1024,
"height": 1024,
"num-inference-steps": 50,
"num-inference-steps": 8,
"num-prompts": 10,
"max-concurrency": 1,
"skip-performance-assertion": true,
"baseline": {
"throughput_qps": 0.1035,
"latency_p99": 9.9057,
"peak_memory_mb_max": 66470,
"peak_memory_mb_mean": 66470
"throughput_qps": 0.21,
"latency_p99": 4.7469,
"peak_memory_mb_max": 101100,
"peak_memory_mb_mean": 101100
}
}
]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,35 +1,33 @@
[
{
"test_name": "test_hunyuan_image_tp2_fp8_sp2",
"description": "TP=2 Quantization=fp8 SP=2 baseline",
"test_name": "test_hunyuan_image_tp2_sp2",
"description": "TP=2 SP=2 baseline",
"server_type": "vllm-omni",
"server_params": {
"model": "tencent/HunyuanImage-3.0-Instruct",
"serve_args": {
"deploy-config": "../vllm_omni/deploy/hunyuan_image3_dit.yaml",
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe use absolute path for UX.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is equivalent to using an absolute path, because the benchmark sets the working directory to tests after launching the Omni process (

cwd=str(Path(__file__).parent.parent.parent.parent),
). Therefore, regardless of the directory from which pytest is executed, the deploy config can always be accessed correctly via the relative path.

"tensor-parallel-size": 2,
"usp": 2,
"quantization": "fp8",
"distributed-executor-backend": "mp",
"enforce-eager": true,
"ulysses-degree": 2,
"enable-diffusion-pipeline-profiler": true
}
},
"benchmark_params": [
{
"name": "1024x1024_steps50",
"name": "1024x1024_steps8",
"dataset": "random",
"task": "t2i",
"width": 1024,
"height": 1024,
"num-inference-steps": 50,
"num-inference-steps": 8,
"num-prompts": 10,
"max-concurrency": 1,
"skip-performance-assertion": true,
"baseline": {
"throughput_qps": 0.08,
"latency_p99": 12.0731,
"peak_memory_mb_max": 66314,
"peak_memory_mb_mean": 66314
"throughput_qps": 0.20,
"latency_p99": 5.1025,
"peak_memory_mb_max": 97402,
"peak_memory_mb_mean": 97402
}
}
]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,34 +1,32 @@
[
{
"test_name": "test_hunyuan_image_tp4_fp8",
"description": "TP=4 Quantization=fp8 baseline",
"test_name": "test_hunyuan_image_tp4",
"description": "TP=4 baseline",
"server_type": "vllm-omni",
"server_params": {
"model": "tencent/HunyuanImage-3.0-Instruct",
"serve_args": {
"deploy-config": "../vllm_omni/deploy/hunyuan_image3_dit.yaml",
"tensor-parallel-size": 4,
"quantization": "fp8",
"distributed-executor-backend": "mp",
"enforce-eager": true,
"enable-diffusion-pipeline-profiler": true
}
},
"benchmark_params": [
{
"name": "1024x1024_steps50",
"name": "1024x1024_steps8",
"dataset": "random",
"task": "t2i",
"width": 1024,
"height": 1024,
"num-inference-steps": 50,
"num-inference-steps": 8,
"num-prompts": 10,
"max-concurrency": 1,
"skip-performance-assertion": true,
"baseline": {
"throughput_qps": 0.08,
"latency_p99": 13.1227,
"peak_memory_mb_max": 46838,
"peak_memory_mb_mean": 46838
"throughput_qps": 0.21,
"latency_p99": 4.9235,
"peak_memory_mb_max": 57576,
"peak_memory_mb_mean": 57576
}
}
]
Expand Down
Loading