diff --git a/.buildkite/scripts/hardware_ci/run-amd-test.sh b/.buildkite/scripts/hardware_ci/run-amd-test.sh index 5f7440a805c..f86b4b5d958 100755 --- a/.buildkite/scripts/hardware_ci/run-amd-test.sh +++ b/.buildkite/scripts/hardware_ci/run-amd-test.sh @@ -116,6 +116,9 @@ if [[ $commands == *"--shard-id="* ]]; then --shm-size=16gb \ --group-add "$render_gid" \ --rm \ + -e MIOPEN_DEBUG_CONV_DIRECT=0 \ + -e MIOPEN_DEBUG_CONV_GEMM=0 \ + -e VLLM_ROCM_USE_AITER=1 \ -e HIP_VISIBLE_DEVICES="${GPU}" \ -e HF_TOKEN \ -e AWS_ACCESS_KEY_ID \ @@ -148,6 +151,9 @@ else --shm-size=16gb \ --group-add "$render_gid" \ --rm \ + -e MIOPEN_DEBUG_CONV_DIRECT=0 \ + -e MIOPEN_DEBUG_CONV_GEMM=0 \ + -e VLLM_ROCM_USE_AITER=1 \ -e HF_TOKEN \ -e AWS_ACCESS_KEY_ID \ -e AWS_SECRET_ACCESS_KEY \ diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml index 08a3f7a1c34..930ef7a409e 100644 --- a/.buildkite/test-amd.yaml +++ b/.buildkite/test-amd.yaml @@ -8,14 +8,32 @@ steps: grade: Blocking commands: - export GPU_ARCHS=gfx942 - - export MIOPEN_DEBUG_CONV_DIRECT=0 - - export MIOPEN_DEBUG_CONV_GEMM=0 - - export VLLM_ROCM_USE_AITER=1 - - export VLLM_ROCM_USE_AITER_MHA=1 - - export VLLM_ROCM_USE_AITER_LINEAR=0 - - export VLLM_ROCM_USE_AITER_RMSNORM=0 - pytest -s -v tests/e2e/offline_inference/test_t2i_model.py +- label: "Diffusion Images API LoRA E2E" + timeout_in_minutes: 20 + agent_pool: mi325_1 + depends_on: amd-build + mirror_hardwares: [amdproduction] + grade: Blocking + commands: + - export GPU_ARCHS=gfx942 + - export VLLM_LOGGING_LEVEL=DEBUG + - export VLLM_WORKER_MULTIPROC_METHOD=spawn + - pytest -s -v tests/e2e/online_serving/test_images_generations_lora.py + +- label: "Diffusion Model CPU offloading Test" + timeout_in_minutes: 20 + agent_pool: mi325_1 + depends_on: amd-build + mirror_hardwares: [amdproduction] + grade: Blocking + commands: + - export GPU_ARCHS=gfx942 + - export VLLM_LOGGING_LEVEL=DEBUG + - export VLLM_WORKER_MULTIPROC_METHOD=spawn + - pytest -s -v tests/e2e/offline_inference/test_diffusion_cpu_offload.py + - label: "Diffusion Cache Backend Test" timeout_in_minutes: 15 agent_pool: mi325_1 @@ -26,25 +44,30 @@ steps: - export GPU_ARCHS=gfx942 - export VLLM_LOGGING_LEVEL=DEBUG - export VLLM_WORKER_MULTIPROC_METHOD=spawn - - export MIOPEN_DEBUG_CONV_DIRECT=0 - - export MIOPEN_DEBUG_CONV_GEMM=0 - - export VLLM_ROCM_USE_AITER=1 - - export VLLM_ROCM_USE_AITER_MHA=1 - - export VLLM_ROCM_USE_AITER_LINEAR=0 - - export VLLM_ROCM_USE_AITER_RMSNORM=0 - pytest -s -v tests/e2e/offline_inference/test_cache_dit.py tests/e2e/offline_inference/test_teacache.py -- label: "Diffusion Parallelism Test" - timeout_in_minutes: 15 +- label: "Diffusion Sequence Parallelism Test" + timeout_in_minutes: 20 agent_pool: mi325_2 depends_on: amd-build mirror_hardwares: [amdproduction] grade: Blocking commands: - - export MIOPEN_DEBUG_CONV_DIRECT=0 - - export MIOPEN_DEBUG_CONV_GEMM=0 + - export GPU_ARCHS=gfx942 + - export VLLM_LOGGING_LEVEL=DEBUG + - export VLLM_WORKER_MULTIPROC_METHOD=spawn - pytest -s -v tests/e2e/offline_inference/test_sequence_parallel.py +- label: "Diffusion Tensor Parallelism Test" + timeout_in_minutes: 20 + agent_pool: mi325_2 + depends_on: amd-build + commands: + - export GPU_ARCHS=gfx942 + - export VLLM_LOGGING_LEVEL=DEBUG + - export VLLM_WORKER_MULTIPROC_METHOD=spawn + - pytest -s -v tests/e2e/offline_inference/test_zimage_tensor_parallel.py + - label: "Diffusion GPU Worker Test" timeout_in_minutes: 20 agent_pool: mi325_2 @@ -52,8 +75,6 @@ steps: mirror_hardwares: [amdproduction] grade: Blocking commands: - - export MIOPEN_DEBUG_CONV_DIRECT=0 - - export MIOPEN_DEBUG_CONV_GEMM=0 - pytest -s -v tests/diffusion/test_diffusion_worker.py - label: "Omni Model Test Qwen2-5-Omni" @@ -66,12 +87,6 @@ steps: - export GPU_ARCHS=gfx942 - export VLLM_LOGGING_LEVEL=DEBUG - export VLLM_WORKER_MULTIPROC_METHOD=spawn - - export MIOPEN_DEBUG_CONV_DIRECT=0 - - export MIOPEN_DEBUG_CONV_GEMM=0 - - export VLLM_ROCM_USE_AITER=1 - - export VLLM_ROCM_USE_AITER_MHA=1 - - export VLLM_ROCM_USE_AITER_LINEAR=0 - - export VLLM_ROCM_USE_AITER_RMSNORM=0 - pytest -s -v tests/e2e/offline_inference/test_qwen2_5_omni.py - label: "Omni Model Test Qwen3-Omni" @@ -83,9 +98,10 @@ steps: commands: - export VLLM_LOGGING_LEVEL=DEBUG - export VLLM_WORKER_MULTIPROC_METHOD=spawn - - export MIOPEN_DEBUG_CONV_DIRECT=0 - - export MIOPEN_DEBUG_CONV_GEMM=0 - - pytest -s -v tests/e2e/offline_inference/test_qwen3_omni.py tests/e2e/online_serving/test_qwen3_omni.py + - export VLLM_TEST_CLEAN_GPU_MEMORY="1" + - pytest -s -v tests/e2e/offline_inference/test_qwen3_omni.py + - pytest -s -v tests/e2e/online_serving/test_qwen3_omni.py + - pytest -s -v tests/e2e/online_serving/test_async_omni.py - label: "Diffusion Image Edit Test" timeout_in_minutes: 15 @@ -97,10 +113,4 @@ steps: - export GPU_ARCHS=gfx942 - export VLLM_LOGGING_LEVEL=DEBUG - export VLLM_WORKER_MULTIPROC_METHOD=spawn - - export MIOPEN_DEBUG_CONV_DIRECT=0 - - export MIOPEN_DEBUG_CONV_GEMM=0 - - export VLLM_ROCM_USE_AITER=1 - - export VLLM_ROCM_USE_AITER_MHA=1 - - export VLLM_ROCM_USE_AITER_LINEAR=0 - - export VLLM_ROCM_USE_AITER_RMSNORM=0 - pytest -s -v tests/e2e/online_serving/test_i2i_multi_image_input.py