Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
a698d51
sync with cuda test and bugfix timeout
tjtanaa Mar 30, 2026
f53c024
qwen3 tts should use onlye 1 GPU
tjtanaa Mar 30, 2026
abb2be2
following CUDA, disable qwen3 omni test in ready pipeline
tjtanaa Mar 31, 2026
4c7548c
Merge remote-tracking branch 'origin/main' into fix-ci20260330
tjtanaa Mar 31, 2026
fd9ed72
increate qwen3 tts unit test time out
tjtanaa Mar 31, 2026
c5872bd
try to trigger test-amd-merge.yaml
tjtanaa Mar 31, 2026
22e2483
increase qwen3 tts timeout
tjtanaa Mar 31, 2026
ee4d0cd
remove Qwen3-TTS Base E2E Test (1/2) for further investigation
tjtanaa Mar 31, 2026
f050fb9
Merge branch 'main' into fix-ci20260330
tjtanaa Mar 31, 2026
1bd718e
Merge remote-tracking branch 'origin/main' into fix-ci20260330
tjtanaa Apr 8, 2026
06d4b0c
move some of the test to mi250
tjtanaa Apr 8, 2026
0f144d3
move some of the test to mi250
tjtanaa Apr 8, 2026
27fe15b
return simple unit test to mi325 to speed up the test
tjtanaa Apr 8, 2026
ea1586f
update the merge.yml to use some mi250 resources
tjtanaa Apr 8, 2026
83e0c3e
prepare trial run amd merge
tjtanaa Apr 8, 2026
a424246
fix dockerfile.rocm test status issue
tjtanaa Apr 8, 2026
4597574
Merge remote-tracking branch 'origin/main' into fix-ci20260330
tjtanaa Apr 10, 2026
f86b089
fix ROCm AR attention backend to pick TRITON_ATTN
tjtanaa Apr 10, 2026
c6b4e94
remove comments
tjtanaa Apr 10, 2026
aaa2f25
change back to mi325, it is unstable
tjtanaa Apr 10, 2026
f8115b8
disable bagel test
tjtanaa Apr 10, 2026
7dff7bf
re-enable ready test
tjtanaa Apr 10, 2026
6115fdc
fix test
tjtanaa Apr 10, 2026
462bbd8
prepare to run on merge
tjtanaa Apr 10, 2026
00b6a5f
enable cosy ttest
tjtanaa Apr 10, 2026
62f6a14
let's not use mi250
tjtanaa Apr 10, 2026
34ea773
now go and test ready
tjtanaa Apr 10, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 61 additions & 55 deletions .buildkite/test-amd-merge.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ steps:
mirror_hardwares: [amdproduction]
grade: Blocking
commands:
- export GPU_ARCHS=gfx942
- export VLLM_LOGGING_LEVEL=DEBUG
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- |
Expand Down Expand Up @@ -63,20 +62,20 @@ steps:
mirror_hardwares: [amdproduction]
grade: Blocking
commands:
- export GPU_ARCHS=gfx942
- export VLLM_LOGGING_LEVEL=DEBUG
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- timeout 15m pytest -s -v -m "core_model and cache and diffusion and not distributed_cuda and L4"

- label: "Diffusion Sequence Parallelism Test"
agent_pool: mi325_2
- label: "Diffusion Sequence Parallelism Test (Need 4 GPUs)"
agent_pool: mi325_4
depends_on: amd-build
mirror_hardwares: [amdproduction]
grade: Blocking
commands:
- export VLLM_LOGGING_LEVEL=DEBUG
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- timeout 20m pytest -s -v tests/e2e/offline_inference/test_sequence_parallel.py
- timeout 20m pytest -s -v tests/diffusion/distributed/test_ulysses_uaa_perf.py

# merge-only tests
- label: "Diffusion Tensor Parallelism Test"
Expand All @@ -95,22 +94,14 @@ steps:
commands:
- timeout 20m pytest -s -v tests/diffusion/test_diffusion_worker.py

- label: "Benchmark & Engine Test"
agent_pool: mi325_2
- label: "Engine Test"
agent_pool: mi325_1
depends_on: amd-build
mirror_hardwares: [amdproduction]
grade: Blocking
commands:
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- |
timeout 20m bash -c '
set +e
pytest -s -v tests/benchmarks/test_serve_cli.py
EXIT1=\$?
pytest -s -v tests/engine/test_async_omni_engine_abort.py
EXIT2=\$?
exit \$((EXIT1 | EXIT2))
'
- timeout 20m pytest -s -v tests/engine/test_async_omni_engine_abort.py

- label: "Omni Model Test Qwen2-5-Omni"
agent_pool: mi325_2
Expand All @@ -121,6 +112,7 @@ steps:
- export VLLM_LOGGING_LEVEL=DEBUG
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- timeout 20m pytest -s -v tests/e2e/offline_inference/test_qwen2_5_omni.py
- timeout 20m pytest -s -v tests/e2e/online_serving/test_qwen2_5_omni.py -m "advanced_model" --run-level "advanced_model"

- label: "Omni Model Test Qwen3-Omni"
agent_pool: mi325_2
Expand All @@ -131,11 +123,10 @@ steps:
- export VLLM_LOGGING_LEVEL=DEBUG
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- export VLLM_TEST_CLEAN_GPU_MEMORY=1
- timeout 10m pytest -s -v tests/e2e/offline_inference/test_qwen3_omni.py
- timeout 20m pytest -s -v tests/e2e/online_serving/test_qwen3_omni.py -m "advanced_model" --run-level "advanced_model"
- timeout 30m pytest -s -v tests/e2e/offline_inference/test_qwen3_omni.py tests/e2e/online_serving/test_qwen3_omni.py tests/e2e/online_serving/test_mimo_audio.py -m "advanced_model" --run-level "advanced_model"

- label: "Qwen3-TTS CustomVoice E2E Test"
agent_pool: mi325_2
agent_pool: mi325_1
depends_on: amd-build
mirror_hardwares: [amdproduction]
grade: Blocking
Expand All @@ -145,21 +136,21 @@ steps:
export VLLM_LOGGING_LEVEL=DEBUG
export VLLM_WORKER_MULTIPROC_METHOD=spawn
export VLLM_ALLOW_LONG_MAX_MODEL_LEN="1"
pytest -s -v tests/e2e/online_serving/test_qwen3_tts_customvoice.py -m "advanced_model" --run-level "advanced_model" && pytest -s -v tests/e2e/offline_inference/test_qwen3_tts_customvoice.py
pytest -s -v tests/e2e/online_serving/test_qwen3_tts_customvoice.py tests/e2e/offline_inference/test_qwen3_tts_customvoice.py -m "advanced_model" --run-level "advanced_model"
'

- label: "Qwen3-TTS Base E2E Test"
agent_pool: mi325_2
agent_pool: mi325_1
depends_on: amd-build
mirror_hardwares: [amdproduction]
grade: Blocking
commands:
- |
timeout 20m bash -c '
timeout 30m bash -c '
export VLLM_LOGGING_LEVEL=DEBUG
export VLLM_WORKER_MULTIPROC_METHOD=spawn
export VLLM_ALLOW_LONG_MAX_MODEL_LEN="1"
pytest -s -v tests/e2e/online_serving/test_qwen3_tts_base.py -m "advanced_model" --run-level "advanced_model" && pytest -s -v tests/e2e/offline_inference/test_qwen3_tts_base.py
pytest -s -v tests/e2e/online_serving/test_qwen3_tts_base.py tests/e2e/offline_inference/test_qwen3_tts_base.py -m "advanced_model" --run-level "advanced_model"
'

- label: "Diffusion Image Edit Test"
Expand All @@ -173,43 +164,58 @@ steps:
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- timeout 20m pytest -s -v tests/e2e/online_serving/test_image_gen_edit.py

# split Bagel Model Test with H100 (Real Weights) into three tests
- label: "Bagel Text2Img Model Test"
agent_pool: mi325_1
depends_on: amd-build
mirror_hardwares: [amdproduction]
grade: Blocking
commands:
- export GPU_ARCHS=gfx942
- export VLLM_TEST_CLEAN_GPU_MEMORY=1
- export VLLM_LOGGING_LEVEL=DEBUG
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- export VLLM_ROCM_USE_AITER_RMSNORM=0
- timeout 30m pytest -s -v tests/e2e/offline_inference/test_bagel_text2img.py -m "advanced_model" --run-level "advanced_model" -k "shared_memory" -k "rocm"
# TODO: Bagel test on ROCm is very unstable. @tjtanaa
# Need to debug before reneable numerical changes across large PRs
# # split Bagel Model Test with H100 (Real Weights) into three tests
# - label: "Bagel Text2Img Model Test (1/3)"
# agent_pool: mi325_1
# depends_on: amd-build
# mirror_hardwares: [amdproduction]
# grade: Blocking
# commands:
# - export GPU_ARCHS=gfx942
# - export VLLM_TEST_CLEAN_GPU_MEMORY=1
# - export VLLM_LOGGING_LEVEL=DEBUG
# - export VLLM_WORKER_MULTIPROC_METHOD=spawn
# - export VLLM_ROCM_USE_AITER_RMSNORM=0
# - timeout 30m pytest -s -v tests/e2e/offline_inference/test_bagel_text2img.py -m "advanced_model" --run-level "advanced_model" -k "shared_memory" -k "rocm"

- label: "Bagel Img2Img Model Test"
agent_pool: mi325_1
depends_on: amd-build
mirror_hardwares: [amdproduction]
grade: Blocking
commands:
- export GPU_ARCHS=gfx942
- export VLLM_TEST_CLEAN_GPU_MEMORY=1
- export VLLM_LOGGING_LEVEL=DEBUG
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- export VLLM_ROCM_USE_AITER_RMSNORM=0
- timeout 30m pytest -s -v tests/e2e/offline_inference/test_bagel_img2img.py -m "advanced_model" --run-level "advanced_model" -k "rocm"
# - label: "Bagel Img2Img Model Test (2/3)"
# agent_pool: mi325_1
# depends_on: amd-build
# mirror_hardwares: [amdproduction]
# grade: Blocking
# commands:
# - export GPU_ARCHS=gfx942
# - export VLLM_TEST_CLEAN_GPU_MEMORY=1
# - export VLLM_LOGGING_LEVEL=DEBUG
# - export VLLM_WORKER_MULTIPROC_METHOD=spawn
# - export VLLM_ROCM_USE_AITER_RMSNORM=0
# - timeout 30m pytest -s -v tests/e2e/offline_inference/test_bagel_img2img.py -m "advanced_model" --run-level "advanced_model" -k "rocm"

# - label: "Bagel Online Serving Test (3/3)"
# agent_pool: mi325_1
# depends_on: amd-build
# mirror_hardwares: [amdproduction]
# grade: Blocking
# commands:
# - export GPU_ARCHS=gfx942
# - export VLLM_TEST_CLEAN_GPU_MEMORY=1
# - export VLLM_IMAGE_FETCH_TIMEOUT=60
# - export VLLM_LOGGING_LEVEL=DEBUG
# - export VLLM_WORKER_MULTIPROC_METHOD=spawn
# - export VLLM_ROCM_USE_AITER_RMSNORM=0
# - timeout 40m pytest -s -v tests/e2e/online_serving/test_bagel_online.py -m "advanced_model" --run-level "advanced_model" -k "rocm"

- label: "Bagel Online Serving Test"
- label: "Voxtral-TTS E2E Test"
agent_pool: mi325_1
depends_on: amd-build
mirror_hardwares: [amdproduction]
grade: Blocking
commands:
- export GPU_ARCHS=gfx942
- export VLLM_TEST_CLEAN_GPU_MEMORY=1
- export VLLM_IMAGE_FETCH_TIMEOUT=60
- export VLLM_LOGGING_LEVEL=DEBUG
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- export VLLM_ROCM_USE_AITER_RMSNORM=0
- timeout 40m pytest -s -v tests/e2e/online_serving/test_bagel_online.py -m "advanced_model" --run-level "advanced_model" -k "rocm"
- |
timeout 20m bash -c '
export VLLM_LOGGING_LEVEL=DEBUG
export VLLM_WORKER_MULTIPROC_METHOD=spawn
pytest -s -v tests/e2e/online_serving/test_voxtral_tts.py tests/e2e/offline_inference/test_voxtral_tts.py -m "advanced_model" --run-level "advanced_model"
'
Loading
Loading