From 664428c6de82689006d65cfaf0eb46e457680514 Mon Sep 17 00:00:00 2001 From: tjtanaa Date: Wed, 28 Jan 2026 10:48:34 +0000 Subject: [PATCH 1/4] enable test Signed-off-by: tjtanaa --- .buildkite/test-amd.yaml | 77 +++++++++++++++++---- tests/e2e/online_serving/test_qwen3_omni.py | 2 - 2 files changed, 62 insertions(+), 17 deletions(-) diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml index 86d65f15bcf..a570182466c 100644 --- a/.buildkite/test-amd.yaml +++ b/.buildkite/test-amd.yaml @@ -11,11 +11,47 @@ steps: - export MIOPEN_DEBUG_CONV_DIRECT=0 - export MIOPEN_DEBUG_CONV_GEMM=0 - export VLLM_ROCM_USE_AITER=1 - - export VLLM_ROCM_USE_AITER_MHA=1 - - export VLLM_ROCM_USE_AITER_LINEAR=0 - - export VLLM_ROCM_USE_AITER_RMSNORM=0 - pytest -s -v tests/e2e/offline_inference/test_t2i_model.py +- label: "Diffusion Images API LoRA E2E" + timeout_in_minutes: 20 + agent_pool: mi325_1 + depends_on: amd-build + mirror_hardwares: [amdproduction] + grade: Blocking + commands: + - export GPU_ARCHS=gfx942 + - export VLLM_LOGGING_LEVEL=DEBUG + - export VLLM_WORKER_MULTIPROC_METHOD=spawn + - export MIOPEN_DEBUG_CONV_DIRECT=0 + - export MIOPEN_DEBUG_CONV_GEMM=0 + - export VLLM_ROCM_USE_AITER=1 + - pytest -s -v tests/e2e/online_serving/test_images_generations_lora.py + +- label: "Diffusion Model CPU offloading Test" + timeout_in_minutes: 20 + agent_pool: mi325_1 + depends_on: amd-build + mirror_hardwares: [amdproduction] + grade: Blocking + commands: + - export GPU_ARCHS=gfx942 + - export VLLM_LOGGING_LEVEL=DEBUG + - export VLLM_WORKER_MULTIPROC_METHOD=spawn + - export MIOPEN_DEBUG_CONV_DIRECT=0 + - export MIOPEN_DEBUG_CONV_GEMM=0 + - export VLLM_ROCM_USE_AITER=1 + - pytest -s -v tests/e2e/offline_inference/test_diffusion_cpu_offload.py + +- label: "Audio Generation Model Test" + timeout_in_minutes: 20 + agent_pool: mi325_1 + depends_on: amd-build + mirror_hardwares: [amdproduction] + grade: Blocking + commands: + - pytest -s -v tests/e2e/offline_inference/test_stable_audio_model.py + - label: "Diffusion Cache Backend Test" timeout_in_minutes: 15 agent_pool: mi325_1 @@ -29,22 +65,36 @@ steps: - export MIOPEN_DEBUG_CONV_DIRECT=0 - export MIOPEN_DEBUG_CONV_GEMM=0 - export VLLM_ROCM_USE_AITER=1 - - export VLLM_ROCM_USE_AITER_MHA=1 - - export VLLM_ROCM_USE_AITER_LINEAR=0 - - export VLLM_ROCM_USE_AITER_RMSNORM=0 - pytest -s -v tests/e2e/offline_inference/test_cache_dit.py tests/e2e/offline_inference/test_teacache.py -- label: "Diffusion Parallelism Test" - timeout_in_minutes: 15 +- label: "Diffusion Sequence Parallelism Test" + timeout_in_minutes: 20 agent_pool: mi325_2 depends_on: amd-build mirror_hardwares: [amdproduction] grade: Blocking commands: + - export GPU_ARCHS=gfx942 + - export VLLM_LOGGING_LEVEL=DEBUG + - export VLLM_WORKER_MULTIPROC_METHOD=spawn - export MIOPEN_DEBUG_CONV_DIRECT=0 - export MIOPEN_DEBUG_CONV_GEMM=0 + - export VLLM_ROCM_USE_AITER=1 - pytest -s -v tests/e2e/offline_inference/test_sequence_parallel.py +- label: "Diffusion Tensor Parallelism Test" + timeout_in_minutes: 20 + agent_pool: mi325_2 + depends_on: amd-build + commands: + - export GPU_ARCHS=gfx942 + - export VLLM_LOGGING_LEVEL=DEBUG + - export VLLM_WORKER_MULTIPROC_METHOD=spawn + - export MIOPEN_DEBUG_CONV_DIRECT=0 + - export MIOPEN_DEBUG_CONV_GEMM=0 + - export VLLM_ROCM_USE_AITER=1 + - pytest -s -v tests/e2e/offline_inference/test_zimage_tensor_parallel.py + - label: "Diffusion GPU Worker Test" timeout_in_minutes: 20 agent_pool: mi325_2 @@ -69,9 +119,6 @@ steps: - export MIOPEN_DEBUG_CONV_DIRECT=0 - export MIOPEN_DEBUG_CONV_GEMM=0 - export VLLM_ROCM_USE_AITER=1 - - export VLLM_ROCM_USE_AITER_MHA=1 - - export VLLM_ROCM_USE_AITER_LINEAR=0 - - export VLLM_ROCM_USE_AITER_RMSNORM=0 - pytest -s -v tests/e2e/offline_inference/test_qwen2_5_omni.py - label: "Omni Model Test Qwen3-Omni" @@ -85,7 +132,10 @@ steps: - export VLLM_WORKER_MULTIPROC_METHOD=spawn - export MIOPEN_DEBUG_CONV_DIRECT=0 - export MIOPEN_DEBUG_CONV_GEMM=0 - - pytest -s -v tests/e2e/offline_inference/test_qwen3_omni.py tests/e2e/online_serving/test_qwen3_omni.py + - export VLLM_TEST_CLEAN_GPU_MEMORY="1" + - pytest -s -v tests/e2e/offline_inference/test_qwen3_omni.py + - pytest -s -v tests/e2e/online_serving/test_qwen3_omni.py + - pytest -s -v tests/e2e/online_serving/test_async_omni.py - label: "Diffusion Image Edit Test" timeout_in_minutes: 15 @@ -100,7 +150,4 @@ steps: - export MIOPEN_DEBUG_CONV_DIRECT=0 - export MIOPEN_DEBUG_CONV_GEMM=0 - export VLLM_ROCM_USE_AITER=1 - - export VLLM_ROCM_USE_AITER_MHA=1 - - export VLLM_ROCM_USE_AITER_LINEAR=0 - - export VLLM_ROCM_USE_AITER_RMSNORM=0 - pytest -s -v tests/e2e/online_serving/test_i2i_multi_image_input.py diff --git a/tests/e2e/online_serving/test_qwen3_omni.py b/tests/e2e/online_serving/test_qwen3_omni.py index 8564104a03f..c591f9bb329 100644 --- a/tests/e2e/online_serving/test_qwen3_omni.py +++ b/tests/e2e/online_serving/test_qwen3_omni.py @@ -144,7 +144,6 @@ def get_max_batch_size(size_type="few"): return batch_sizes.get(size_type, 5) -@pytest.mark.skipif(is_rocm(), reason="Test skipped on AMD environment due to known output issues") @pytest.mark.parametrize("omni_server", test_params, indirect=True) def test_mix_to_text_audio_001(client: openai.OpenAI, omni_server, request) -> None: """ @@ -224,7 +223,6 @@ def test_mix_to_text_audio_001(client: openai.OpenAI, omni_server, request) -> N assert similarity > 0.9, "The audio content is not same as the text" -@pytest.mark.skipif(is_rocm(), reason="Test skipped on AMD environment due to known output issues") @pytest.mark.parametrize("omni_server", test_params, indirect=True) def test_text_to_text_audio_001(client: openai.OpenAI, omni_server) -> None: """ From 8f40f68ae26269bcebba364d10078e077d60a163 Mon Sep 17 00:00:00 2001 From: tjtanaa Date: Wed, 28 Jan 2026 15:39:46 +0000 Subject: [PATCH 2/4] fix the rocm yaml Signed-off-by: tjtanaa --- .../online_serving/stage_configs/rocm/qwen3_omni_ci.yaml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tests/e2e/online_serving/stage_configs/rocm/qwen3_omni_ci.yaml b/tests/e2e/online_serving/stage_configs/rocm/qwen3_omni_ci.yaml index ef0bae0fadc..d8157881246 100644 --- a/tests/e2e/online_serving/stage_configs/rocm/qwen3_omni_ci.yaml +++ b/tests/e2e/online_serving/stage_configs/rocm/qwen3_omni_ci.yaml @@ -22,7 +22,6 @@ stage_args: enable_prefix_caching: false hf_config_name: thinker_config tensor_parallel_size: 2 - load_format: dummy final_output: true final_output_type: text is_comprehension: true @@ -52,7 +51,6 @@ stage_args: enable_prefix_caching: false distributed_executor_backend: "mp" hf_config_name: talker_config - load_format: dummy engine_input_source: [0] custom_process_input_func: vllm_omni.model_executor.stage_input_processors.qwen3_omni.thinker2talker # final_output: true @@ -60,7 +58,7 @@ stage_args: default_sampling_params: temperature: 0.9 top_k: 50 - max_tokens: 100 + max_tokens: 1000 seed: 42 detokenize: False repetition_penalty: 1.05 @@ -83,7 +81,6 @@ stage_args: distributed_executor_backend: "mp" max_num_batched_tokens: 1000000 hf_config_name: thinker_config - load_format: dummy async_scheduling: false engine_input_source: [1] custom_process_input_func: vllm_omni.model_executor.stage_input_processors.qwen3_omni.talker2code2wav @@ -93,7 +90,7 @@ stage_args: temperature: 0.0 top_p: 1.0 top_k: -1 - max_tokens: 200 + max_tokens: 2000 seed: 42 detokenize: True repetition_penalty: 1.1 From aa887f4ca096afd1a2bed2fa28bdbae66b14d4d5 Mon Sep 17 00:00:00 2001 From: tjtanaa Date: Wed, 28 Jan 2026 15:41:21 +0000 Subject: [PATCH 3/4] only run the two tests for debugging Signed-off-by: tjtanaa --- .buildkite/test-amd.yaml | 236 +++++++++++++++++++-------------------- 1 file changed, 118 insertions(+), 118 deletions(-) diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml index a570182466c..7462662166a 100644 --- a/.buildkite/test-amd.yaml +++ b/.buildkite/test-amd.yaml @@ -1,47 +1,47 @@ steps: -- label: "Diffusion Model Test" - timeout_in_minutes: 20 - agent_pool: mi325_2 - depends_on: amd-build - mirror_hardwares: [amdproduction] - grade: Blocking - commands: - - export GPU_ARCHS=gfx942 - - export MIOPEN_DEBUG_CONV_DIRECT=0 - - export MIOPEN_DEBUG_CONV_GEMM=0 - - export VLLM_ROCM_USE_AITER=1 - - pytest -s -v tests/e2e/offline_inference/test_t2i_model.py +# - label: "Diffusion Model Test" +# timeout_in_minutes: 20 +# agent_pool: mi325_2 +# depends_on: amd-build +# mirror_hardwares: [amdproduction] +# grade: Blocking +# commands: +# - export GPU_ARCHS=gfx942 +# - export MIOPEN_DEBUG_CONV_DIRECT=0 +# - export MIOPEN_DEBUG_CONV_GEMM=0 +# - export VLLM_ROCM_USE_AITER=1 +# - pytest -s -v tests/e2e/offline_inference/test_t2i_model.py -- label: "Diffusion Images API LoRA E2E" - timeout_in_minutes: 20 - agent_pool: mi325_1 - depends_on: amd-build - mirror_hardwares: [amdproduction] - grade: Blocking - commands: - - export GPU_ARCHS=gfx942 - - export VLLM_LOGGING_LEVEL=DEBUG - - export VLLM_WORKER_MULTIPROC_METHOD=spawn - - export MIOPEN_DEBUG_CONV_DIRECT=0 - - export MIOPEN_DEBUG_CONV_GEMM=0 - - export VLLM_ROCM_USE_AITER=1 - - pytest -s -v tests/e2e/online_serving/test_images_generations_lora.py +# - label: "Diffusion Images API LoRA E2E" +# timeout_in_minutes: 20 +# agent_pool: mi325_1 +# depends_on: amd-build +# mirror_hardwares: [amdproduction] +# grade: Blocking +# commands: +# - export GPU_ARCHS=gfx942 +# - export VLLM_LOGGING_LEVEL=DEBUG +# - export VLLM_WORKER_MULTIPROC_METHOD=spawn +# - export MIOPEN_DEBUG_CONV_DIRECT=0 +# - export MIOPEN_DEBUG_CONV_GEMM=0 +# - export VLLM_ROCM_USE_AITER=1 +# - pytest -s -v tests/e2e/online_serving/test_images_generations_lora.py -- label: "Diffusion Model CPU offloading Test" - timeout_in_minutes: 20 - agent_pool: mi325_1 - depends_on: amd-build - mirror_hardwares: [amdproduction] - grade: Blocking - commands: - - export GPU_ARCHS=gfx942 - - export VLLM_LOGGING_LEVEL=DEBUG - - export VLLM_WORKER_MULTIPROC_METHOD=spawn - - export MIOPEN_DEBUG_CONV_DIRECT=0 - - export MIOPEN_DEBUG_CONV_GEMM=0 - - export VLLM_ROCM_USE_AITER=1 - - pytest -s -v tests/e2e/offline_inference/test_diffusion_cpu_offload.py +# - label: "Diffusion Model CPU offloading Test" +# timeout_in_minutes: 20 +# agent_pool: mi325_1 +# depends_on: amd-build +# mirror_hardwares: [amdproduction] +# grade: Blocking +# commands: +# - export GPU_ARCHS=gfx942 +# - export VLLM_LOGGING_LEVEL=DEBUG +# - export VLLM_WORKER_MULTIPROC_METHOD=spawn +# - export MIOPEN_DEBUG_CONV_DIRECT=0 +# - export MIOPEN_DEBUG_CONV_GEMM=0 +# - export VLLM_ROCM_USE_AITER=1 +# - pytest -s -v tests/e2e/offline_inference/test_diffusion_cpu_offload.py - label: "Audio Generation Model Test" timeout_in_minutes: 20 @@ -52,74 +52,74 @@ steps: commands: - pytest -s -v tests/e2e/offline_inference/test_stable_audio_model.py -- label: "Diffusion Cache Backend Test" - timeout_in_minutes: 15 - agent_pool: mi325_1 - depends_on: amd-build - mirror_hardwares: [amdproduction] - grade: Blocking - commands: - - export GPU_ARCHS=gfx942 - - export VLLM_LOGGING_LEVEL=DEBUG - - export VLLM_WORKER_MULTIPROC_METHOD=spawn - - export MIOPEN_DEBUG_CONV_DIRECT=0 - - export MIOPEN_DEBUG_CONV_GEMM=0 - - export VLLM_ROCM_USE_AITER=1 - - pytest -s -v tests/e2e/offline_inference/test_cache_dit.py tests/e2e/offline_inference/test_teacache.py +# - label: "Diffusion Cache Backend Test" +# timeout_in_minutes: 15 +# agent_pool: mi325_1 +# depends_on: amd-build +# mirror_hardwares: [amdproduction] +# grade: Blocking +# commands: +# - export GPU_ARCHS=gfx942 +# - export VLLM_LOGGING_LEVEL=DEBUG +# - export VLLM_WORKER_MULTIPROC_METHOD=spawn +# - export MIOPEN_DEBUG_CONV_DIRECT=0 +# - export MIOPEN_DEBUG_CONV_GEMM=0 +# - export VLLM_ROCM_USE_AITER=1 +# - pytest -s -v tests/e2e/offline_inference/test_cache_dit.py tests/e2e/offline_inference/test_teacache.py -- label: "Diffusion Sequence Parallelism Test" - timeout_in_minutes: 20 - agent_pool: mi325_2 - depends_on: amd-build - mirror_hardwares: [amdproduction] - grade: Blocking - commands: - - export GPU_ARCHS=gfx942 - - export VLLM_LOGGING_LEVEL=DEBUG - - export VLLM_WORKER_MULTIPROC_METHOD=spawn - - export MIOPEN_DEBUG_CONV_DIRECT=0 - - export MIOPEN_DEBUG_CONV_GEMM=0 - - export VLLM_ROCM_USE_AITER=1 - - pytest -s -v tests/e2e/offline_inference/test_sequence_parallel.py +# - label: "Diffusion Sequence Parallelism Test" +# timeout_in_minutes: 20 +# agent_pool: mi325_2 +# depends_on: amd-build +# mirror_hardwares: [amdproduction] +# grade: Blocking +# commands: +# - export GPU_ARCHS=gfx942 +# - export VLLM_LOGGING_LEVEL=DEBUG +# - export VLLM_WORKER_MULTIPROC_METHOD=spawn +# - export MIOPEN_DEBUG_CONV_DIRECT=0 +# - export MIOPEN_DEBUG_CONV_GEMM=0 +# - export VLLM_ROCM_USE_AITER=1 +# - pytest -s -v tests/e2e/offline_inference/test_sequence_parallel.py -- label: "Diffusion Tensor Parallelism Test" - timeout_in_minutes: 20 - agent_pool: mi325_2 - depends_on: amd-build - commands: - - export GPU_ARCHS=gfx942 - - export VLLM_LOGGING_LEVEL=DEBUG - - export VLLM_WORKER_MULTIPROC_METHOD=spawn - - export MIOPEN_DEBUG_CONV_DIRECT=0 - - export MIOPEN_DEBUG_CONV_GEMM=0 - - export VLLM_ROCM_USE_AITER=1 - - pytest -s -v tests/e2e/offline_inference/test_zimage_tensor_parallel.py +# - label: "Diffusion Tensor Parallelism Test" +# timeout_in_minutes: 20 +# agent_pool: mi325_2 +# depends_on: amd-build +# commands: +# - export GPU_ARCHS=gfx942 +# - export VLLM_LOGGING_LEVEL=DEBUG +# - export VLLM_WORKER_MULTIPROC_METHOD=spawn +# - export MIOPEN_DEBUG_CONV_DIRECT=0 +# - export MIOPEN_DEBUG_CONV_GEMM=0 +# - export VLLM_ROCM_USE_AITER=1 +# - pytest -s -v tests/e2e/offline_inference/test_zimage_tensor_parallel.py -- label: "Diffusion GPU Worker Test" - timeout_in_minutes: 20 - agent_pool: mi325_2 - depends_on: amd-build - mirror_hardwares: [amdproduction] - grade: Blocking - commands: - - export MIOPEN_DEBUG_CONV_DIRECT=0 - - export MIOPEN_DEBUG_CONV_GEMM=0 - - pytest -s -v tests/diffusion/test_gpu_diffusion_worker.py +# - label: "Diffusion GPU Worker Test" +# timeout_in_minutes: 20 +# agent_pool: mi325_2 +# depends_on: amd-build +# mirror_hardwares: [amdproduction] +# grade: Blocking +# commands: +# - export MIOPEN_DEBUG_CONV_DIRECT=0 +# - export MIOPEN_DEBUG_CONV_GEMM=0 +# - pytest -s -v tests/diffusion/test_gpu_diffusion_worker.py -- label: "Omni Model Test Qwen2-5-Omni" - timeout_in_minutes: 15 - agent_pool: mi325_2 - depends_on: amd-build - mirror_hardwares: [amdproduction] - grade: Blocking - commands: - - export GPU_ARCHS=gfx942 - - export VLLM_LOGGING_LEVEL=DEBUG - - export VLLM_WORKER_MULTIPROC_METHOD=spawn - - export MIOPEN_DEBUG_CONV_DIRECT=0 - - export MIOPEN_DEBUG_CONV_GEMM=0 - - export VLLM_ROCM_USE_AITER=1 - - pytest -s -v tests/e2e/offline_inference/test_qwen2_5_omni.py +# - label: "Omni Model Test Qwen2-5-Omni" +# timeout_in_minutes: 15 +# agent_pool: mi325_2 +# depends_on: amd-build +# mirror_hardwares: [amdproduction] +# grade: Blocking +# commands: +# - export GPU_ARCHS=gfx942 +# - export VLLM_LOGGING_LEVEL=DEBUG +# - export VLLM_WORKER_MULTIPROC_METHOD=spawn +# - export MIOPEN_DEBUG_CONV_DIRECT=0 +# - export MIOPEN_DEBUG_CONV_GEMM=0 +# - export VLLM_ROCM_USE_AITER=1 +# - pytest -s -v tests/e2e/offline_inference/test_qwen2_5_omni.py - label: "Omni Model Test Qwen3-Omni" timeout_in_minutes: 15 @@ -137,17 +137,17 @@ steps: - pytest -s -v tests/e2e/online_serving/test_qwen3_omni.py - pytest -s -v tests/e2e/online_serving/test_async_omni.py -- label: "Diffusion Image Edit Test" - timeout_in_minutes: 15 - agent_pool: mi325_1 - depends_on: amd-build - mirror_hardwares: [amdproduction] - grade: Blocking - commands: - - export GPU_ARCHS=gfx942 - - export VLLM_LOGGING_LEVEL=DEBUG - - export VLLM_WORKER_MULTIPROC_METHOD=spawn - - export MIOPEN_DEBUG_CONV_DIRECT=0 - - export MIOPEN_DEBUG_CONV_GEMM=0 - - export VLLM_ROCM_USE_AITER=1 - - pytest -s -v tests/e2e/online_serving/test_i2i_multi_image_input.py +# - label: "Diffusion Image Edit Test" +# timeout_in_minutes: 15 +# agent_pool: mi325_1 +# depends_on: amd-build +# mirror_hardwares: [amdproduction] +# grade: Blocking +# commands: +# - export GPU_ARCHS=gfx942 +# - export VLLM_LOGGING_LEVEL=DEBUG +# - export VLLM_WORKER_MULTIPROC_METHOD=spawn +# - export MIOPEN_DEBUG_CONV_DIRECT=0 +# - export MIOPEN_DEBUG_CONV_GEMM=0 +# - export VLLM_ROCM_USE_AITER=1 +# - pytest -s -v tests/e2e/online_serving/test_i2i_multi_image_input.py From bdb2342b82fd59c1daa6ffe5f761dc725521f11c Mon Sep 17 00:00:00 2001 From: tjtanaa Date: Wed, 28 Jan 2026 16:35:57 +0000 Subject: [PATCH 4/4] enable all tests and make aiter enabled default Signed-off-by: tjtanaa --- .../scripts/hardware_ci/run-amd-test.sh | 6 + .buildkite/test-amd.yaml | 209 +++++++----------- 2 files changed, 92 insertions(+), 123 deletions(-) diff --git a/.buildkite/scripts/hardware_ci/run-amd-test.sh b/.buildkite/scripts/hardware_ci/run-amd-test.sh index 5f7440a805c..f86b4b5d958 100755 --- a/.buildkite/scripts/hardware_ci/run-amd-test.sh +++ b/.buildkite/scripts/hardware_ci/run-amd-test.sh @@ -116,6 +116,9 @@ if [[ $commands == *"--shard-id="* ]]; then --shm-size=16gb \ --group-add "$render_gid" \ --rm \ + -e MIOPEN_DEBUG_CONV_DIRECT=0 \ + -e MIOPEN_DEBUG_CONV_GEMM=0 \ + -e VLLM_ROCM_USE_AITER=1 \ -e HIP_VISIBLE_DEVICES="${GPU}" \ -e HF_TOKEN \ -e AWS_ACCESS_KEY_ID \ @@ -148,6 +151,9 @@ else --shm-size=16gb \ --group-add "$render_gid" \ --rm \ + -e MIOPEN_DEBUG_CONV_DIRECT=0 \ + -e MIOPEN_DEBUG_CONV_GEMM=0 \ + -e VLLM_ROCM_USE_AITER=1 \ -e HF_TOKEN \ -e AWS_ACCESS_KEY_ID \ -e AWS_SECRET_ACCESS_KEY \ diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml index 7462662166a..7a91c829806 100644 --- a/.buildkite/test-amd.yaml +++ b/.buildkite/test-amd.yaml @@ -1,125 +1,93 @@ steps: -# - label: "Diffusion Model Test" -# timeout_in_minutes: 20 -# agent_pool: mi325_2 -# depends_on: amd-build -# mirror_hardwares: [amdproduction] -# grade: Blocking -# commands: -# - export GPU_ARCHS=gfx942 -# - export MIOPEN_DEBUG_CONV_DIRECT=0 -# - export MIOPEN_DEBUG_CONV_GEMM=0 -# - export VLLM_ROCM_USE_AITER=1 -# - pytest -s -v tests/e2e/offline_inference/test_t2i_model.py +- label: "Diffusion Model Test" + timeout_in_minutes: 20 + agent_pool: mi325_2 + depends_on: amd-build + mirror_hardwares: [amdproduction] + grade: Blocking + commands: + - export GPU_ARCHS=gfx942 + - pytest -s -v tests/e2e/offline_inference/test_t2i_model.py -# - label: "Diffusion Images API LoRA E2E" -# timeout_in_minutes: 20 -# agent_pool: mi325_1 -# depends_on: amd-build -# mirror_hardwares: [amdproduction] -# grade: Blocking -# commands: -# - export GPU_ARCHS=gfx942 -# - export VLLM_LOGGING_LEVEL=DEBUG -# - export VLLM_WORKER_MULTIPROC_METHOD=spawn -# - export MIOPEN_DEBUG_CONV_DIRECT=0 -# - export MIOPEN_DEBUG_CONV_GEMM=0 -# - export VLLM_ROCM_USE_AITER=1 -# - pytest -s -v tests/e2e/online_serving/test_images_generations_lora.py +- label: "Diffusion Images API LoRA E2E" + timeout_in_minutes: 20 + agent_pool: mi325_1 + depends_on: amd-build + mirror_hardwares: [amdproduction] + grade: Blocking + commands: + - export GPU_ARCHS=gfx942 + - export VLLM_LOGGING_LEVEL=DEBUG + - export VLLM_WORKER_MULTIPROC_METHOD=spawn + - pytest -s -v tests/e2e/online_serving/test_images_generations_lora.py -# - label: "Diffusion Model CPU offloading Test" -# timeout_in_minutes: 20 -# agent_pool: mi325_1 -# depends_on: amd-build -# mirror_hardwares: [amdproduction] -# grade: Blocking -# commands: -# - export GPU_ARCHS=gfx942 -# - export VLLM_LOGGING_LEVEL=DEBUG -# - export VLLM_WORKER_MULTIPROC_METHOD=spawn -# - export MIOPEN_DEBUG_CONV_DIRECT=0 -# - export MIOPEN_DEBUG_CONV_GEMM=0 -# - export VLLM_ROCM_USE_AITER=1 -# - pytest -s -v tests/e2e/offline_inference/test_diffusion_cpu_offload.py - -- label: "Audio Generation Model Test" +- label: "Diffusion Model CPU offloading Test" timeout_in_minutes: 20 agent_pool: mi325_1 depends_on: amd-build mirror_hardwares: [amdproduction] grade: Blocking commands: - - pytest -s -v tests/e2e/offline_inference/test_stable_audio_model.py + - export GPU_ARCHS=gfx942 + - export VLLM_LOGGING_LEVEL=DEBUG + - export VLLM_WORKER_MULTIPROC_METHOD=spawn + - pytest -s -v tests/e2e/offline_inference/test_diffusion_cpu_offload.py -# - label: "Diffusion Cache Backend Test" -# timeout_in_minutes: 15 -# agent_pool: mi325_1 -# depends_on: amd-build -# mirror_hardwares: [amdproduction] -# grade: Blocking -# commands: -# - export GPU_ARCHS=gfx942 -# - export VLLM_LOGGING_LEVEL=DEBUG -# - export VLLM_WORKER_MULTIPROC_METHOD=spawn -# - export MIOPEN_DEBUG_CONV_DIRECT=0 -# - export MIOPEN_DEBUG_CONV_GEMM=0 -# - export VLLM_ROCM_USE_AITER=1 -# - pytest -s -v tests/e2e/offline_inference/test_cache_dit.py tests/e2e/offline_inference/test_teacache.py +- label: "Diffusion Cache Backend Test" + timeout_in_minutes: 15 + agent_pool: mi325_1 + depends_on: amd-build + mirror_hardwares: [amdproduction] + grade: Blocking + commands: + - export GPU_ARCHS=gfx942 + - export VLLM_LOGGING_LEVEL=DEBUG + - export VLLM_WORKER_MULTIPROC_METHOD=spawn + - pytest -s -v tests/e2e/offline_inference/test_cache_dit.py tests/e2e/offline_inference/test_teacache.py -# - label: "Diffusion Sequence Parallelism Test" -# timeout_in_minutes: 20 -# agent_pool: mi325_2 -# depends_on: amd-build -# mirror_hardwares: [amdproduction] -# grade: Blocking -# commands: -# - export GPU_ARCHS=gfx942 -# - export VLLM_LOGGING_LEVEL=DEBUG -# - export VLLM_WORKER_MULTIPROC_METHOD=spawn -# - export MIOPEN_DEBUG_CONV_DIRECT=0 -# - export MIOPEN_DEBUG_CONV_GEMM=0 -# - export VLLM_ROCM_USE_AITER=1 -# - pytest -s -v tests/e2e/offline_inference/test_sequence_parallel.py +- label: "Diffusion Sequence Parallelism Test" + timeout_in_minutes: 20 + agent_pool: mi325_2 + depends_on: amd-build + mirror_hardwares: [amdproduction] + grade: Blocking + commands: + - export GPU_ARCHS=gfx942 + - export VLLM_LOGGING_LEVEL=DEBUG + - export VLLM_WORKER_MULTIPROC_METHOD=spawn + - pytest -s -v tests/e2e/offline_inference/test_sequence_parallel.py -# - label: "Diffusion Tensor Parallelism Test" -# timeout_in_minutes: 20 -# agent_pool: mi325_2 -# depends_on: amd-build -# commands: -# - export GPU_ARCHS=gfx942 -# - export VLLM_LOGGING_LEVEL=DEBUG -# - export VLLM_WORKER_MULTIPROC_METHOD=spawn -# - export MIOPEN_DEBUG_CONV_DIRECT=0 -# - export MIOPEN_DEBUG_CONV_GEMM=0 -# - export VLLM_ROCM_USE_AITER=1 -# - pytest -s -v tests/e2e/offline_inference/test_zimage_tensor_parallel.py +- label: "Diffusion Tensor Parallelism Test" + timeout_in_minutes: 20 + agent_pool: mi325_2 + depends_on: amd-build + commands: + - export GPU_ARCHS=gfx942 + - export VLLM_LOGGING_LEVEL=DEBUG + - export VLLM_WORKER_MULTIPROC_METHOD=spawn + - pytest -s -v tests/e2e/offline_inference/test_zimage_tensor_parallel.py -# - label: "Diffusion GPU Worker Test" -# timeout_in_minutes: 20 -# agent_pool: mi325_2 -# depends_on: amd-build -# mirror_hardwares: [amdproduction] -# grade: Blocking -# commands: -# - export MIOPEN_DEBUG_CONV_DIRECT=0 -# - export MIOPEN_DEBUG_CONV_GEMM=0 -# - pytest -s -v tests/diffusion/test_gpu_diffusion_worker.py +- label: "Diffusion GPU Worker Test" + timeout_in_minutes: 20 + agent_pool: mi325_2 + depends_on: amd-build + mirror_hardwares: [amdproduction] + grade: Blocking + commands: + - pytest -s -v tests/diffusion/test_gpu_diffusion_worker.py -# - label: "Omni Model Test Qwen2-5-Omni" -# timeout_in_minutes: 15 -# agent_pool: mi325_2 -# depends_on: amd-build -# mirror_hardwares: [amdproduction] -# grade: Blocking -# commands: -# - export GPU_ARCHS=gfx942 -# - export VLLM_LOGGING_LEVEL=DEBUG -# - export VLLM_WORKER_MULTIPROC_METHOD=spawn -# - export MIOPEN_DEBUG_CONV_DIRECT=0 -# - export MIOPEN_DEBUG_CONV_GEMM=0 -# - export VLLM_ROCM_USE_AITER=1 -# - pytest -s -v tests/e2e/offline_inference/test_qwen2_5_omni.py +- label: "Omni Model Test Qwen2-5-Omni" + timeout_in_minutes: 15 + agent_pool: mi325_2 + depends_on: amd-build + mirror_hardwares: [amdproduction] + grade: Blocking + commands: + - export GPU_ARCHS=gfx942 + - export VLLM_LOGGING_LEVEL=DEBUG + - export VLLM_WORKER_MULTIPROC_METHOD=spawn + - pytest -s -v tests/e2e/offline_inference/test_qwen2_5_omni.py - label: "Omni Model Test Qwen3-Omni" timeout_in_minutes: 15 @@ -130,24 +98,19 @@ steps: commands: - export VLLM_LOGGING_LEVEL=DEBUG - export VLLM_WORKER_MULTIPROC_METHOD=spawn - - export MIOPEN_DEBUG_CONV_DIRECT=0 - - export MIOPEN_DEBUG_CONV_GEMM=0 - export VLLM_TEST_CLEAN_GPU_MEMORY="1" - pytest -s -v tests/e2e/offline_inference/test_qwen3_omni.py - pytest -s -v tests/e2e/online_serving/test_qwen3_omni.py - pytest -s -v tests/e2e/online_serving/test_async_omni.py -# - label: "Diffusion Image Edit Test" -# timeout_in_minutes: 15 -# agent_pool: mi325_1 -# depends_on: amd-build -# mirror_hardwares: [amdproduction] -# grade: Blocking -# commands: -# - export GPU_ARCHS=gfx942 -# - export VLLM_LOGGING_LEVEL=DEBUG -# - export VLLM_WORKER_MULTIPROC_METHOD=spawn -# - export MIOPEN_DEBUG_CONV_DIRECT=0 -# - export MIOPEN_DEBUG_CONV_GEMM=0 -# - export VLLM_ROCM_USE_AITER=1 -# - pytest -s -v tests/e2e/online_serving/test_i2i_multi_image_input.py +- label: "Diffusion Image Edit Test" + timeout_in_minutes: 15 + agent_pool: mi325_1 + depends_on: amd-build + mirror_hardwares: [amdproduction] + grade: Blocking + commands: + - export GPU_ARCHS=gfx942 + - export VLLM_LOGGING_LEVEL=DEBUG + - export VLLM_WORKER_MULTIPROC_METHOD=spawn + - pytest -s -v tests/e2e/online_serving/test_i2i_multi_image_input.py