From 70197930a5c546dfe3324c42061724ae93361142 Mon Sep 17 00:00:00 2001 From: Yida Wu Date: Wed, 11 Jun 2025 19:56:11 +0000 Subject: [PATCH 1/5] fix amd multi step test Signed-off-by: Yida Wu --- tests/multi_step/test_correctness_llm.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/multi_step/test_correctness_llm.py b/tests/multi_step/test_correctness_llm.py index 9f1b3bbe8e22..800cc7adf32f 100644 --- a/tests/multi_step/test_correctness_llm.py +++ b/tests/multi_step/test_correctness_llm.py @@ -9,6 +9,7 @@ import pytest from vllm.utils import STR_BACKEND_ENV_VAR +from vllm.platforms import current_platform from ..models.utils import check_logprobs_close, check_outputs_equal @@ -71,6 +72,10 @@ def test_multi_step_llm( num_logprobs: corresponds to the `logprobs` argument to the OpenAI completions endpoint; `None` -> 1 logprob returned. """ + if current_platform.is_rocm() and \ + (attention_backend=="FLASHINFER" or enable_chunked_prefill==True): + pytest.skip("Multi-Step + Chunked-Prefill not supported on ROCm") + with monkeypatch.context() as m: m.setenv(STR_BACKEND_ENV_VAR, attention_backend) @@ -221,6 +226,8 @@ def test_multi_step_llm_w_prompt_logprobs( @pytest.mark.parametrize("num_prompts", NUM_PROMPTS) @pytest.mark.parametrize("num_logprobs", [None, 5]) @pytest.mark.parametrize("attention_backend", ["FLASH_ATTN"]) +@pytest.mark.skipif(current_platform.is_rocm(), + reason="Multi-Step + Chunked-Prefill not supported on ROCm") def test_multi_step_llm_chunked_prefill_prefix_cache( vllm_runner, example_prompts, From 0e3eff0d2b5f085010bf83090159c7186b60df53 Mon Sep 17 00:00:00 2001 From: Yida Wu Date: Wed, 11 Jun 2025 20:26:34 +0000 Subject: [PATCH 2/5] enable multi-step production test Signed-off-by: Yida Wu --- .buildkite/test-pipeline.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index b739851cb905..1392f0aed01e 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -669,7 +669,7 @@ steps: - pytest -v -s plugins/lora_resolvers # unit tests for in-tree lora resolver plugins - label: Multi-step Tests (4 GPUs) # 36min - mirror_hardwares: [amdexperimental] + mirror_hardwares: [amdexperimental, amdproduction] working_dir: "/vllm-workspace/tests" num_gpus: 4 source_file_dependencies: From 1f4a3f049844eeeee0ce9f6149f2faecf32c933b Mon Sep 17 00:00:00 2001 From: Concurrensee Date: Wed, 11 Jun 2025 17:13:57 -0500 Subject: [PATCH 3/5] Update tests/multi_step/test_correctness_llm.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Signed-off-by: Yida Wu --- tests/multi_step/test_correctness_llm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/multi_step/test_correctness_llm.py b/tests/multi_step/test_correctness_llm.py index 800cc7adf32f..83dfea6b9205 100644 --- a/tests/multi_step/test_correctness_llm.py +++ b/tests/multi_step/test_correctness_llm.py @@ -73,8 +73,8 @@ def test_multi_step_llm( completions endpoint; `None` -> 1 logprob returned. """ if current_platform.is_rocm() and \ - (attention_backend=="FLASHINFER" or enable_chunked_prefill==True): - pytest.skip("Multi-Step + Chunked-Prefill not supported on ROCm") + (attention_backend == "FLASHINFER" or enable_chunked_prefill): + pytest.skip("Multi-Step with FLASHINFER or Chunked-Prefill is not supported on ROCm") with monkeypatch.context() as m: m.setenv(STR_BACKEND_ENV_VAR, attention_backend) From f4623bfd9549a20d407fef2fb1bd5c0d84c2f83a Mon Sep 17 00:00:00 2001 From: Yida Wu Date: Wed, 11 Jun 2025 22:57:57 +0000 Subject: [PATCH 4/5] formatting Signed-off-by: Yida Wu --- tests/multi_step/test_correctness_llm.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tests/multi_step/test_correctness_llm.py b/tests/multi_step/test_correctness_llm.py index 83dfea6b9205..37ad4f2ed302 100644 --- a/tests/multi_step/test_correctness_llm.py +++ b/tests/multi_step/test_correctness_llm.py @@ -8,8 +8,8 @@ import pytest -from vllm.utils import STR_BACKEND_ENV_VAR from vllm.platforms import current_platform +from vllm.utils import STR_BACKEND_ENV_VAR from ..models.utils import check_logprobs_close, check_outputs_equal @@ -74,7 +74,9 @@ def test_multi_step_llm( """ if current_platform.is_rocm() and \ (attention_backend == "FLASHINFER" or enable_chunked_prefill): - pytest.skip("Multi-Step with FLASHINFER or Chunked-Prefill is not supported on ROCm") + pytest.skip( + "Multi-Step with FLASHINFER or Chunked-Prefill is notsupported" + "on ROCm") with monkeypatch.context() as m: m.setenv(STR_BACKEND_ENV_VAR, attention_backend) @@ -226,8 +228,9 @@ def test_multi_step_llm_w_prompt_logprobs( @pytest.mark.parametrize("num_prompts", NUM_PROMPTS) @pytest.mark.parametrize("num_logprobs", [None, 5]) @pytest.mark.parametrize("attention_backend", ["FLASH_ATTN"]) -@pytest.mark.skipif(current_platform.is_rocm(), - reason="Multi-Step + Chunked-Prefill not supported on ROCm") +@pytest.mark.skipif( + current_platform.is_rocm(), + reason="Multi-Step + Chunked-Prefill not supported on ROCm") def test_multi_step_llm_chunked_prefill_prefix_cache( vllm_runner, example_prompts, From f8a3cd4467a1ee27797878b1123b9c365cd0e77b Mon Sep 17 00:00:00 2001 From: Concurrensee Date: Fri, 13 Jun 2025 12:32:07 -0500 Subject: [PATCH 5/5] Update tests/multi_step/test_correctness_llm.py Co-authored-by: Cyrus Leung Signed-off-by: Yida Wu --- tests/multi_step/test_correctness_llm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/multi_step/test_correctness_llm.py b/tests/multi_step/test_correctness_llm.py index 37ad4f2ed302..0df00c98b72c 100644 --- a/tests/multi_step/test_correctness_llm.py +++ b/tests/multi_step/test_correctness_llm.py @@ -75,7 +75,7 @@ def test_multi_step_llm( if current_platform.is_rocm() and \ (attention_backend == "FLASHINFER" or enable_chunked_prefill): pytest.skip( - "Multi-Step with FLASHINFER or Chunked-Prefill is notsupported" + "Multi-Step with FLASHINFER or Chunked-Prefill is not supported" "on ROCm") with monkeypatch.context() as m: