Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .buildkite/scripts/hardware_ci/run-amd-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,9 @@ if [[ $commands == *"--shard-id="* ]]; then
--shm-size=16gb \
--group-add "$render_gid" \
--rm \
-e MIOPEN_DEBUG_CONV_DIRECT=0 \
-e MIOPEN_DEBUG_CONV_GEMM=0 \
-e VLLM_ROCM_USE_AITER=1 \
-e HIP_VISIBLE_DEVICES="${GPU}" \
-e HF_TOKEN \
-e AWS_ACCESS_KEY_ID \
Expand Down Expand Up @@ -148,6 +151,9 @@ else
--shm-size=16gb \
--group-add "$render_gid" \
--rm \
-e MIOPEN_DEBUG_CONV_DIRECT=0 \
-e MIOPEN_DEBUG_CONV_GEMM=0 \
-e VLLM_ROCM_USE_AITER=1 \
-e HF_TOKEN \
-e AWS_ACCESS_KEY_ID \
-e AWS_SECRET_ACCESS_KEY \
Expand Down
76 changes: 43 additions & 33 deletions .buildkite/test-amd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,32 @@ steps:
grade: Blocking
commands:
- export GPU_ARCHS=gfx942
- export MIOPEN_DEBUG_CONV_DIRECT=0
- export MIOPEN_DEBUG_CONV_GEMM=0
- export VLLM_ROCM_USE_AITER=1
- export VLLM_ROCM_USE_AITER_MHA=1
- export VLLM_ROCM_USE_AITER_LINEAR=0
- export VLLM_ROCM_USE_AITER_RMSNORM=0
- pytest -s -v tests/e2e/offline_inference/test_t2i_model.py

- label: "Diffusion Images API LoRA E2E"
timeout_in_minutes: 20
agent_pool: mi325_1
depends_on: amd-build
mirror_hardwares: [amdproduction]
grade: Blocking
commands:
- export GPU_ARCHS=gfx942
- export VLLM_LOGGING_LEVEL=DEBUG
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- pytest -s -v tests/e2e/online_serving/test_images_generations_lora.py

- label: "Diffusion Model CPU offloading Test"
timeout_in_minutes: 20
agent_pool: mi325_1
depends_on: amd-build
mirror_hardwares: [amdproduction]
grade: Blocking
commands:
- export GPU_ARCHS=gfx942
- export VLLM_LOGGING_LEVEL=DEBUG
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- pytest -s -v tests/e2e/offline_inference/test_diffusion_cpu_offload.py

- label: "Diffusion Cache Backend Test"
timeout_in_minutes: 15
agent_pool: mi325_1
Expand All @@ -26,34 +44,37 @@ steps:
- export GPU_ARCHS=gfx942
- export VLLM_LOGGING_LEVEL=DEBUG
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- export MIOPEN_DEBUG_CONV_DIRECT=0
- export MIOPEN_DEBUG_CONV_GEMM=0
- export VLLM_ROCM_USE_AITER=1
- export VLLM_ROCM_USE_AITER_MHA=1
- export VLLM_ROCM_USE_AITER_LINEAR=0
- export VLLM_ROCM_USE_AITER_RMSNORM=0
- pytest -s -v tests/e2e/offline_inference/test_cache_dit.py tests/e2e/offline_inference/test_teacache.py

- label: "Diffusion Parallelism Test"
timeout_in_minutes: 15
- label: "Diffusion Sequence Parallelism Test"
timeout_in_minutes: 20
agent_pool: mi325_2
depends_on: amd-build
mirror_hardwares: [amdproduction]
grade: Blocking
commands:
- export MIOPEN_DEBUG_CONV_DIRECT=0
- export MIOPEN_DEBUG_CONV_GEMM=0
- export GPU_ARCHS=gfx942
- export VLLM_LOGGING_LEVEL=DEBUG
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- pytest -s -v tests/e2e/offline_inference/test_sequence_parallel.py

- label: "Diffusion Tensor Parallelism Test"
timeout_in_minutes: 20
agent_pool: mi325_2
depends_on: amd-build
commands:
Comment on lines +61 to +65
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Ensure the new AMD tensor-parallel test is included

The new "Diffusion Tensor Parallelism Test" step omits mirror_hardwares, but the AMD pipeline template only emits steps when step.mirror_hardwares is present and contains mirror_hw (see .buildkite/test-template-amd-omni.j2 lines 33–35). This means the step is never added to the generated pipeline and the test won’t run at all. Add mirror_hardwares: [amdproduction] (and grade if needed) so the step is actually executed.

Useful? React with 👍 / 👎.

- export GPU_ARCHS=gfx942
- export VLLM_LOGGING_LEVEL=DEBUG
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- pytest -s -v tests/e2e/offline_inference/test_zimage_tensor_parallel.py

- label: "Diffusion GPU Worker Test"
timeout_in_minutes: 20
agent_pool: mi325_2
depends_on: amd-build
mirror_hardwares: [amdproduction]
grade: Blocking
commands:
- export MIOPEN_DEBUG_CONV_DIRECT=0
- export MIOPEN_DEBUG_CONV_GEMM=0
- pytest -s -v tests/diffusion/test_diffusion_worker.py

- label: "Omni Model Test Qwen2-5-Omni"
Expand All @@ -66,12 +87,6 @@ steps:
- export GPU_ARCHS=gfx942
- export VLLM_LOGGING_LEVEL=DEBUG
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- export MIOPEN_DEBUG_CONV_DIRECT=0
- export MIOPEN_DEBUG_CONV_GEMM=0
- export VLLM_ROCM_USE_AITER=1
- export VLLM_ROCM_USE_AITER_MHA=1
- export VLLM_ROCM_USE_AITER_LINEAR=0
- export VLLM_ROCM_USE_AITER_RMSNORM=0
- pytest -s -v tests/e2e/offline_inference/test_qwen2_5_omni.py

- label: "Omni Model Test Qwen3-Omni"
Expand All @@ -83,9 +98,10 @@ steps:
commands:
- export VLLM_LOGGING_LEVEL=DEBUG
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- export MIOPEN_DEBUG_CONV_DIRECT=0
- export MIOPEN_DEBUG_CONV_GEMM=0
- pytest -s -v tests/e2e/offline_inference/test_qwen3_omni.py tests/e2e/online_serving/test_qwen3_omni.py
- export VLLM_TEST_CLEAN_GPU_MEMORY="1"
- pytest -s -v tests/e2e/offline_inference/test_qwen3_omni.py
- pytest -s -v tests/e2e/online_serving/test_qwen3_omni.py
- pytest -s -v tests/e2e/online_serving/test_async_omni.py

- label: "Diffusion Image Edit Test"
timeout_in_minutes: 15
Expand All @@ -97,10 +113,4 @@ steps:
- export GPU_ARCHS=gfx942
- export VLLM_LOGGING_LEVEL=DEBUG
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- export MIOPEN_DEBUG_CONV_DIRECT=0
- export MIOPEN_DEBUG_CONV_GEMM=0
- export VLLM_ROCM_USE_AITER=1
- export VLLM_ROCM_USE_AITER_MHA=1
- export VLLM_ROCM_USE_AITER_LINEAR=0
- export VLLM_ROCM_USE_AITER_RMSNORM=0
- pytest -s -v tests/e2e/online_serving/test_i2i_multi_image_input.py