Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
4394296
setup test amd ready
tjtanaa Mar 3, 2026
c3d327f
fix syntax
tjtanaa Mar 3, 2026
0244098
fix the commands
tjtanaa Mar 3, 2026
ea6de96
revert jinja; clean up test-amd-ready.yaml
tjtanaa Mar 3, 2026
9e82c04
try the command
tjtanaa Mar 4, 2026
167eb53
fix the jinja issue
tjtanaa Mar 4, 2026
f3cc84b
fix the multiline issue
tjtanaa Mar 4, 2026
6facd38
resolve jinja issue
tjtanaa Mar 4, 2026
2ce1ef6
fix the jinja bash command parsing issue
tjtanaa Mar 5, 2026
4a0afa6
try to resolve the bootstrapped command syntax error
tjtanaa Mar 16, 2026
593e33f
fix EXIT syntax
tjtanaa Mar 16, 2026
a4aaf3b
Merge remote-tracking branch 'origin/main' into setupreadymergeci
tjtanaa Mar 16, 2026
69dbebb
Merge remote-tracking branch 'origin/main' into setupreadymergeci
tjtanaa Mar 16, 2026
b71daa0
disable AITER as it is not shipped prebuilt; fix bagel tests
tjtanaa Mar 16, 2026
abda29c
disable stable audio model ut; fix test_serve_cli test and qwen25omni…
tjtanaa Mar 17, 2026
25b1c16
disable aiter, and change diffusion gpu worker test to mi250
tjtanaa Mar 17, 2026
6fbf447
move some tests to mi250
tjtanaa Mar 17, 2026
53765f3
add support to test-amd-merge
tjtanaa Mar 17, 2026
df285a9
increase timeout and add more jobs to mi250 queue
tjtanaa Mar 17, 2026
8d7c517
point all tests back to mi325 machine
tjtanaa Mar 18, 2026
6dec429
test merge yaml
tjtanaa Mar 19, 2026
916c4b9
Merge remote-tracking branch 'origin/main' into setupreadymergeci
tjtanaa Mar 19, 2026
f478363
fix test qwen3 omni audio test
tjtanaa Mar 19, 2026
88308b3
evaluate test-ready.yml after sync main
tjtanaa Mar 19, 2026
b3bcaf7
complete the pr
tjtanaa Mar 19, 2026
8801f26
sync with main
tjtanaa Mar 19, 2026
a040792
update bagel img2img expectation
tjtanaa Mar 19, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion .buildkite/bootstrap-amd-omni.sh
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,18 @@ upload_pipeline() {
FAIL_FAST=$(fail_fast)

cd .buildkite

# Select test definition file: merge suite for main, ready suite for PRs
if [[ $BUILDKITE_BRANCH == "main" ]]; then
TEST_YAML="test-amd-merge.yml"
else
TEST_YAML="test-amd-ready.yaml"
fi

(
set -x
# Output pipeline.yaml with all blank lines removed
minijinja-cli test-template.j2 test-amd.yaml \
minijinja-cli test-template.j2 "$TEST_YAML" \
-D branch="$BUILDKITE_BRANCH" \
-D list_file_diff="$LIST_FILE_DIFF" \
-D run_all="$RUN_ALL" \
Expand Down
11 changes: 8 additions & 3 deletions .buildkite/scripts/hardware_ci/run-amd-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,11 @@ HF_CACHE="$(realpath ~)/huggingface"
mkdir -p "${HF_CACHE}"
HF_MOUNT="/root/.cache/huggingface"

commands=$@
if [[ -n "${TEST_COMMAND:-}" ]]; then
commands="$TEST_COMMAND"
else
commands="$@"
fi
echo "Commands:$commands"

PARALLEL_JOB_COUNT=8
Expand All @@ -102,6 +106,7 @@ if [[ -z "$render_gid" ]]; then
fi

# check if the command contains shard flag, we will run all shards in parallel because the host have 8 GPUs.
# TODO: @tjtanaa reenable to run VLLM_ROCM_USE_AITER=1 when AITER is shipped with prebuilt kernels.
if [[ $commands == *"--shard-id="* ]]; then
# assign job count as the number of shards used
commands=$(echo "$commands" | sed -E "s/--num-shards[[:blank:]]*=[[:blank:]]*[0-9]*/--num-shards=${PARALLEL_JOB_COUNT} /g" | sed 's/ \\ / /g')
Expand All @@ -118,7 +123,7 @@ if [[ $commands == *"--shard-id="* ]]; then
--rm \
-e MIOPEN_DEBUG_CONV_DIRECT=0 \
-e MIOPEN_DEBUG_CONV_GEMM=0 \
-e VLLM_ROCM_USE_AITER=1 \
-e VLLM_ROCM_USE_AITER=0 \
-e HIP_VISIBLE_DEVICES="${GPU}" \
-e HF_TOKEN \
-e AWS_ACCESS_KEY_ID \
Expand Down Expand Up @@ -153,7 +158,7 @@ else
--rm \
-e MIOPEN_DEBUG_CONV_DIRECT=0 \
-e MIOPEN_DEBUG_CONV_GEMM=0 \
-e VLLM_ROCM_USE_AITER=1 \
-e VLLM_ROCM_USE_AITER=0 \
-e HF_TOKEN \
-e AWS_ACCESS_KEY_ID \
-e AWS_SECRET_ACCESS_KEY \
Expand Down
197 changes: 197 additions & 0 deletions .buildkite/test-amd-merge.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
steps:

- label: "Simple Unit Test"
agent_pool: mi325_1
depends_on: amd-build
mirror_hardwares: [amdproduction]
grade: Blocking
commands:
- export VLLM_ROCM_USE_AITER=0
- "timeout 20m pytest -v -s -m 'core_model and cpu' --cov=vllm_omni --cov-branch --cov-report=term-missing --cov-report=html --cov-report=xml"

- label: "Diffusion Model Test"
agent_pool: mi325_2
depends_on: amd-build
mirror_hardwares: [amdproduction]
grade: Blocking
commands:
- export GPU_ARCHS=gfx942
- timeout 20m pytest -s -v tests/e2e/offline_inference/test_t2i_model.py -m "advanced_model and diffusion" --run-level "advanced_model"

- label: "Diffusion Images API LoRA E2E"
agent_pool: mi325_1
depends_on: amd-build
mirror_hardwares: [amdproduction]
grade: Blocking
commands:
- timeout 20m pytest -s -v tests/e2e/online_serving/test_images_generations_lora.py

- label: "Diffusion Model CPU offloading Test"
agent_pool: mi325_1
depends_on: amd-build
mirror_hardwares: [amdproduction]
grade: Blocking
commands:
- export GPU_ARCHS=gfx942
- export VLLM_LOGGING_LEVEL=DEBUG
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- |
timeout 20m bash -c '
set +e
pytest -s -v tests/e2e/offline_inference/test_diffusion_cpu_offload.py
EXIT1=\$?
pytest -s -v tests/e2e/offline_inference/test_diffusion_layerwise_offload.py
EXIT2=\$?
exit \$((EXIT1 | EXIT2))
'

## ISSUE depends on `diffusers` package: https://github.com/huggingface/diffusers/issues/13274
# - label: "Audio Generation Model Test"
# agent_pool: mi325_1
# depends_on: amd-build
# mirror_hardwares: [amdproduction]
# grade: Blocking
# commands:
# - export GPU_ARCHS=gfx942
# - export VLLM_LOGGING_LEVEL=DEBUG
# - export VLLM_WORKER_MULTIPROC_METHOD=spawn
# - timeout 20m pytest -s -v tests/e2e/offline_inference/test_stable_audio_model.py

- label: "Diffusion Cache Backend Test"
agent_pool: mi325_1
depends_on: amd-build
mirror_hardwares: [amdproduction]
grade: Blocking
commands:
- export GPU_ARCHS=gfx942
- export VLLM_LOGGING_LEVEL=DEBUG
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- timeout 15m pytest -s -v -m "core_model and cache and diffusion and not distributed_cuda and L4"

- label: "Diffusion Sequence Parallelism Test"
agent_pool: mi325_2
depends_on: amd-build
mirror_hardwares: [amdproduction]
grade: Blocking
commands:
- export VLLM_LOGGING_LEVEL=DEBUG
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- timeout 20m pytest -s -v tests/e2e/offline_inference/test_sequence_parallel.py

# merge-only tests
- label: "Diffusion Tensor Parallelism Test"
agent_pool: mi325_2
depends_on: amd-build
mirror_hardwares: [amdproduction]
grade: Blocking
commands:
- pytest -s -v tests/e2e/offline_inference/test_zimage_parallelism.py

- label: "Diffusion GPU Worker Test"
agent_pool: mi325_2
depends_on: amd-build
mirror_hardwares: [amdproduction]
grade: Blocking
commands:
- timeout 20m pytest -s -v tests/diffusion/test_diffusion_worker.py

- label: "Benchmark & Engine Test"
agent_pool: mi325_2
depends_on: amd-build
mirror_hardwares: [amdproduction]
grade: Blocking
commands:
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- |
timeout 20m bash -c '
set +e
pytest -s -v tests/benchmarks/test_serve_cli.py
EXIT1=\$?
pytest -s -v tests/engine/test_async_omni_engine_abort.py
EXIT2=\$?
exit \$((EXIT1 | EXIT2))
'

- label: "Omni Model Test Qwen2-5-Omni"
agent_pool: mi325_2
depends_on: amd-build
mirror_hardwares: [amdproduction]
grade: Blocking
commands:
- export VLLM_LOGGING_LEVEL=DEBUG
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- timeout 20m pytest -s -v tests/e2e/offline_inference/test_qwen2_5_omni.py

- label: "Omni Model Test Qwen3-Omni"
agent_pool: mi325_2
depends_on: amd-build
mirror_hardwares: [amdproduction]
grade: Blocking
commands:
- export VLLM_LOGGING_LEVEL=DEBUG
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- export VLLM_TEST_CLEAN_GPU_MEMORY=1
- timeout 10m pytest -s -v tests/e2e/offline_inference/test_qwen3_omni.py
- timeout 20m pytest -s -v tests/e2e/online_serving/test_qwen3_omni.py -m "advanced_model" --run-level "advanced_model"

- label: "Qwen3-TTS E2E Test"
agent_pool: mi325_2
depends_on: amd-build
mirror_hardwares: [amdproduction]
grade: Blocking
commands:
- export VLLM_LOGGING_LEVEL=DEBUG
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- timeout 20m pytest -s -v tests/e2e/online_serving/test_qwen3_tts.py

- label: "Diffusion Image Edit Test"
agent_pool: mi325_1
depends_on: amd-build
mirror_hardwares: [amdproduction]
grade: Blocking
commands:
- export GPU_ARCHS=gfx942
- export VLLM_LOGGING_LEVEL=DEBUG
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- timeout 20m pytest -s -v tests/e2e/online_serving/test_image_gen_edit.py

# split Bagel Model Test with H100 (Real Weights) into three tests
- label: "Bagel Text2Img Model Test"
agent_pool: mi325_1
depends_on: amd-build
mirror_hardwares: [amdproduction]
grade: Blocking
commands:
- export GPU_ARCHS=gfx942
- export VLLM_TEST_CLEAN_GPU_MEMORY=1
- export VLLM_LOGGING_LEVEL=DEBUG
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- export VLLM_ROCM_USE_AITER_RMSNORM=0
- timeout 30m pytest -s -v tests/e2e/offline_inference/test_bagel_text2img.py -m "advanced_model" --run-level "advanced_model" -k "shared_memory" -k "rocm"

- label: "Bagel Img2Img Model Test"
agent_pool: mi325_1
depends_on: amd-build
mirror_hardwares: [amdproduction]
grade: Blocking
commands:
- export GPU_ARCHS=gfx942
- export VLLM_TEST_CLEAN_GPU_MEMORY=1
- export VLLM_LOGGING_LEVEL=DEBUG
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- export VLLM_ROCM_USE_AITER_RMSNORM=0
- timeout 30m pytest -s -v tests/e2e/offline_inference/test_bagel_img2img.py -m "advanced_model" --run-level "advanced_model" -k "rocm"

- label: "Bagel Online Serving Test"
agent_pool: mi325_1
depends_on: amd-build
mirror_hardwares: [amdproduction]
grade: Blocking
commands:
- export GPU_ARCHS=gfx942
- export VLLM_TEST_CLEAN_GPU_MEMORY=1
- export VLLM_IMAGE_FETCH_TIMEOUT=60
- export VLLM_LOGGING_LEVEL=DEBUG
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- export VLLM_ROCM_USE_AITER_RMSNORM=0
- timeout 40m pytest -s -v tests/e2e/online_serving/test_bagel_online.py -m "advanced_model" --run-level "advanced_model" -k "rocm"
Loading
Loading