From 4b6d44691cbc51d8d5b5482de7f3d17b4ecf73df Mon Sep 17 00:00:00 2001 From: "Alexei V. Ivanov" Date: Mon, 12 May 2025 14:42:53 -0500 Subject: [PATCH 1/8] Adding new test. Signed-off-by: Alexei V. Ivanov --- .buildkite/test-pipeline.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 9664615be85d..6900efdcf937 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -703,6 +703,7 @@ steps: - bash weight_loading/run_model_weight_loading_test.sh -c weight_loading/models.txt - label: Weight Loading Multiple GPU Test - Large Models # optional + mirror_hardwares: [amdexperimental] working_dir: "/vllm-workspace/tests" num_gpus: 2 gpu: a100 From b067a92aab530712251e93fc81a56cd3529d39aa Mon Sep 17 00:00:00 2001 From: "Alexei V. Ivanov" Date: Tue, 13 May 2025 23:38:09 +0000 Subject: [PATCH 2/8] Adding "Basic Models Test" and "Multi-Modal Models Test (Extended) 3" Signed-off-by: Alexei V. Ivanov --- .buildkite/scripts/hardware_ci/run-amd-test.sh | 8 ++++++++ .buildkite/test-pipeline.yaml | 6 +++--- requirements/rocm-test.txt | 6 ++++++ tests/models/test_transformers.py | 6 ++++-- 4 files changed, 21 insertions(+), 5 deletions(-) diff --git a/.buildkite/scripts/hardware_ci/run-amd-test.sh b/.buildkite/scripts/hardware_ci/run-amd-test.sh index 97dcc42312f6..bbc896ec6819 100755 --- a/.buildkite/scripts/hardware_ci/run-amd-test.sh +++ b/.buildkite/scripts/hardware_ci/run-amd-test.sh @@ -82,6 +82,14 @@ if [[ $commands == *"pytest -v -s basic_correctness/test_basic_correctness.py"* commands=${commands//"pytest -v -s basic_correctness/test_basic_correctness.py"/"VLLM_USE_TRITON_FLASH_ATTN=0 pytest -v -s basic_correctness/test_basic_correctness.py"} fi +if [[ $commands == *"pytest -v -s models/test_registry.py"* ]]; then + commands=${commands//"pytest -v -s models/test_registry.py"/"pytest -v -s models/test_registry.py -k 'not BambaForCausalLM and not GritLM and not Mamba2ForCausalLM and not Zamba2ForCausalLM'"} +fi + +if [[ $commands == *"VLLM_USE_V1=0 pytest -v -s models/test_initialization.py -k 'not llama4 and not plamo2'"* ]]; then + commands=${commands//"VLLM_USE_V1=0 pytest -v -s models/test_initialization.py -k 'not llama4 and not plamo2'"/"VLLM_USE_V1=0 pytest -v -s models/test_initialization.py -k 'not llama4 and not plamo2 and not BambaForCausalLM and not Gemma2ForCausalLM and not Grok1ModelForCausalLM and not Zamba2ForCausalLM and not Gemma2Model and not GritLM'"} +fi + if [[ $commands == *"pytest -v -s compile/test_basic_correctness.py"* ]]; then commands=${commands//"pytest -v -s compile/test_basic_correctness.py"/"VLLM_USE_TRITON_FLASH_ATTN=0 pytest -v -s compile/test_basic_correctness.py"} fi diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index da5db189f70a..e39649048fae 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -455,7 +455,7 @@ steps: ##### models test ##### - label: Basic Models Test # 24min - mirror_hardwares: [amdexperimental] + mirror_hardwares: [amdexperimental, amdproduction] torch_nightly: true source_file_dependencies: - vllm/ @@ -527,7 +527,7 @@ steps: - pytest -v -s models/multimodal/generation/test_common.py -m 'split(group=0) and not core_model' - label: Multi-Modal Models Test (Extended) 3 - mirror_hardwares: [amdexperimental] + mirror_hardwares: [amdexperimental, amdproduction] optional: true source_file_dependencies: - vllm/ @@ -537,7 +537,7 @@ steps: - pytest -v -s models/multimodal/generation/test_common.py -m 'split(group=1) and not core_model' - label: Quantized Models Test - mirror_hardwares: [amdexperimental] + mirror_hardwares: [amdexperimental, amdproduction] source_file_dependencies: - vllm/model_executor/layers/quantization - tests/models/quantization diff --git a/requirements/rocm-test.txt b/requirements/rocm-test.txt index abd4212c6e35..25f950a99ece 100644 --- a/requirements/rocm-test.txt +++ b/requirements/rocm-test.txt @@ -22,4 +22,10 @@ decord==0.6.0 #sentence-transformers # required by entrypoints/openai/test_score.py sentence-transformers==3.4.1 +# Basic Models Test +matplotlib==3.10.3 + +# Multi-Modal Models Test (Extended) 3 +blobfile==3.0.0 + diff --git a/tests/models/test_transformers.py b/tests/models/test_transformers.py index 6da488897be5..7ad179ed876b 100644 --- a/tests/models/test_transformers.py +++ b/tests/models/test_transformers.py @@ -32,7 +32,8 @@ def check_implementation( name_1="vllm", ) - +@pytest.mark.skipif(current_platform.is_rocm() + reason="Llama-3.2-1B-Instruct, Ilama-3.2-1B produce memory access fault.") @pytest.mark.parametrize( "model,model_impl", [ @@ -63,7 +64,8 @@ def test_distributed( check_implementation(hf_runner, vllm_runner, example_prompts, "meta-llama/Llama-3.2-1B-Instruct", **kwargs) - +@pytest.mark.skipif(current_platform.is_rocm() + reason="bitsandbytes quantization is currently not supported in rocm.") @pytest.mark.parametrize("model, quantization_kwargs", [ ( "meta-llama/Llama-3.2-1B-Instruct", From 3e769743654f934a04acebb6636336c64b4a1c41 Mon Sep 17 00:00:00 2001 From: Alexei-V-Ivanov-AMD <156011006+Alexei-V-Ivanov-AMD@users.noreply.github.com> Date: Tue, 13 May 2025 23:00:46 -0500 Subject: [PATCH 3/8] Update tests/models/test_transformers.py Signed-off-by: Alexei V. Ivanov Co-authored-by: Cyrus Leung --- tests/models/test_transformers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/models/test_transformers.py b/tests/models/test_transformers.py index 7ad179ed876b..e277637e2cc1 100644 --- a/tests/models/test_transformers.py +++ b/tests/models/test_transformers.py @@ -32,7 +32,7 @@ def check_implementation( name_1="vllm", ) -@pytest.mark.skipif(current_platform.is_rocm() +@pytest.mark.skipif(current_platform.is_rocm()) reason="Llama-3.2-1B-Instruct, Ilama-3.2-1B produce memory access fault.") @pytest.mark.parametrize( "model,model_impl", From 1458f4c1fba442f125237964bd83141940f91b14 Mon Sep 17 00:00:00 2001 From: Alexei-V-Ivanov-AMD <156011006+Alexei-V-Ivanov-AMD@users.noreply.github.com> Date: Tue, 13 May 2025 23:00:56 -0500 Subject: [PATCH 4/8] Update tests/models/test_transformers.py Signed-off-by: Alexei V. Ivanov Co-authored-by: Cyrus Leung --- tests/models/test_transformers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/models/test_transformers.py b/tests/models/test_transformers.py index e277637e2cc1..3ce95cad1bb2 100644 --- a/tests/models/test_transformers.py +++ b/tests/models/test_transformers.py @@ -64,7 +64,7 @@ def test_distributed( check_implementation(hf_runner, vllm_runner, example_prompts, "meta-llama/Llama-3.2-1B-Instruct", **kwargs) -@pytest.mark.skipif(current_platform.is_rocm() +@pytest.mark.skipif(current_platform.is_rocm(), reason="bitsandbytes quantization is currently not supported in rocm.") @pytest.mark.parametrize("model, quantization_kwargs", [ ( From 283559eefd0228b55c85f9b066820be060297333 Mon Sep 17 00:00:00 2001 From: Alexei-V-Ivanov-AMD <156011006+Alexei-V-Ivanov-AMD@users.noreply.github.com> Date: Tue, 13 May 2025 23:03:46 -0500 Subject: [PATCH 5/8] Update test_transformers.py Signed-off-by: Alexei V. Ivanov --- tests/models/test_transformers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/models/test_transformers.py b/tests/models/test_transformers.py index 3ce95cad1bb2..023e10cc5f0b 100644 --- a/tests/models/test_transformers.py +++ b/tests/models/test_transformers.py @@ -32,7 +32,7 @@ def check_implementation( name_1="vllm", ) -@pytest.mark.skipif(current_platform.is_rocm()) +@pytest.mark.skipif(current_platform.is_rocm(), reason="Llama-3.2-1B-Instruct, Ilama-3.2-1B produce memory access fault.") @pytest.mark.parametrize( "model,model_impl", From 94c458003d178baa52b8ae130c5db951d6368c48 Mon Sep 17 00:00:00 2001 From: "Alexei V. Ivanov" Date: Wed, 14 May 2025 22:12:24 +0000 Subject: [PATCH 6/8] Fixed imports. Signed-off-by: Alexei V. Ivanov --- tests/models/test_transformers.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/models/test_transformers.py b/tests/models/test_transformers.py index 7ad179ed876b..10fb5ea62136 100644 --- a/tests/models/test_transformers.py +++ b/tests/models/test_transformers.py @@ -1,12 +1,14 @@ # SPDX-License-Identifier: Apache-2.0 """Test the functionality of the Transformers backend.""" import pytest +from vllm.platforms import current_platform from ..conftest import HfRunner, VllmRunner from ..utils import multi_gpu_test from .utils import check_logprobs_close + def check_implementation( hf_runner: type[HfRunner], vllm_runner: type[VllmRunner], @@ -32,7 +34,7 @@ def check_implementation( name_1="vllm", ) -@pytest.mark.skipif(current_platform.is_rocm() +@pytest.mark.skipif(current_platform.is_rocm(), reason="Llama-3.2-1B-Instruct, Ilama-3.2-1B produce memory access fault.") @pytest.mark.parametrize( "model,model_impl", @@ -64,7 +66,7 @@ def test_distributed( check_implementation(hf_runner, vllm_runner, example_prompts, "meta-llama/Llama-3.2-1B-Instruct", **kwargs) -@pytest.mark.skipif(current_platform.is_rocm() +@pytest.mark.skipif(current_platform.is_rocm(), reason="bitsandbytes quantization is currently not supported in rocm.") @pytest.mark.parametrize("model, quantization_kwargs", [ ( From 182dec024f672e2616add7d3b0997bdad049b5f6 Mon Sep 17 00:00:00 2001 From: "Alexei V. Ivanov" Date: Thu, 15 May 2025 13:47:40 +0000 Subject: [PATCH 7/8] Fixing pre-commit. Signed-off-by: Alexei V. Ivanov --- requirements/test.txt | 22 ++++++++++++++++++++-- tests/models/test_transformers.py | 10 +++++++--- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/requirements/test.txt b/requirements/test.txt index 9a15d9a0d824..29def9252e2f 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -27,6 +27,10 @@ argcomplete==3.5.1 # via datamodel-code-generator arrow==1.3.0 # via isoduration +async-timeout==5.0.1 + # via + # aiohttp + # redis attrs==24.2.0 # via # aiohttp @@ -126,6 +130,11 @@ encodec==0.1.1 # via vocos evaluate==0.4.3 # via lm-eval +exceptiongroup==1.3.0 + # via + # anyio + # hypothesis + # pytest fastparquet==2024.11.0 # via genai-perf fastrlock==0.8.2 @@ -623,7 +632,6 @@ setuptools==77.0.3 # via # mamba-ssm # pytablewriter - # torch # triton shellingham==1.5.4 # via typer @@ -683,8 +691,13 @@ tokenizers==0.21.1 # via # -r requirements/test.in # transformers +toml==0.10.2 + # via datamodel-code-generator tomli==2.2.1 - # via schemathesis + # via + # black + # pytest + # schemathesis tomli-w==1.2.0 # via schemathesis torch==2.7.0+cu128 @@ -756,12 +769,17 @@ types-python-dateutil==2.9.0.20241206 # via arrow typing-extensions==4.12.2 # via + # anyio + # black + # exceptiongroup # huggingface-hub # librosa # mistral-common + # multidict # pqdm # pydantic # pydantic-core + # rich # torch # typer tzdata==2024.2 diff --git a/tests/models/test_transformers.py b/tests/models/test_transformers.py index 10fb5ea62136..6e38c4c7cadb 100644 --- a/tests/models/test_transformers.py +++ b/tests/models/test_transformers.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 """Test the functionality of the Transformers backend.""" import pytest + from vllm.platforms import current_platform from ..conftest import HfRunner, VllmRunner @@ -8,7 +9,6 @@ from .utils import check_logprobs_close - def check_implementation( hf_runner: type[HfRunner], vllm_runner: type[VllmRunner], @@ -34,7 +34,9 @@ def check_implementation( name_1="vllm", ) -@pytest.mark.skipif(current_platform.is_rocm(), + +@pytest.mark.skipif( + current_platform.is_rocm(), reason="Llama-3.2-1B-Instruct, Ilama-3.2-1B produce memory access fault.") @pytest.mark.parametrize( "model,model_impl", @@ -66,7 +68,9 @@ def test_distributed( check_implementation(hf_runner, vllm_runner, example_prompts, "meta-llama/Llama-3.2-1B-Instruct", **kwargs) -@pytest.mark.skipif(current_platform.is_rocm(), + +@pytest.mark.skipif( + current_platform.is_rocm(), reason="bitsandbytes quantization is currently not supported in rocm.") @pytest.mark.parametrize("model, quantization_kwargs", [ ( From 194711aae85c7529a1f35321f7f925b518c37f03 Mon Sep 17 00:00:00 2001 From: "Alexei V. Ivanov" Date: Thu, 15 May 2025 15:29:51 +0000 Subject: [PATCH 8/8] Reverting requirements/test.txt Signed-off-by: Alexei V. Ivanov --- requirements/test.txt | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/requirements/test.txt b/requirements/test.txt index 29def9252e2f..9a15d9a0d824 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -27,10 +27,6 @@ argcomplete==3.5.1 # via datamodel-code-generator arrow==1.3.0 # via isoduration -async-timeout==5.0.1 - # via - # aiohttp - # redis attrs==24.2.0 # via # aiohttp @@ -130,11 +126,6 @@ encodec==0.1.1 # via vocos evaluate==0.4.3 # via lm-eval -exceptiongroup==1.3.0 - # via - # anyio - # hypothesis - # pytest fastparquet==2024.11.0 # via genai-perf fastrlock==0.8.2 @@ -632,6 +623,7 @@ setuptools==77.0.3 # via # mamba-ssm # pytablewriter + # torch # triton shellingham==1.5.4 # via typer @@ -691,13 +683,8 @@ tokenizers==0.21.1 # via # -r requirements/test.in # transformers -toml==0.10.2 - # via datamodel-code-generator tomli==2.2.1 - # via - # black - # pytest - # schemathesis + # via schemathesis tomli-w==1.2.0 # via schemathesis torch==2.7.0+cu128 @@ -769,17 +756,12 @@ types-python-dateutil==2.9.0.20241206 # via arrow typing-extensions==4.12.2 # via - # anyio - # black - # exceptiongroup # huggingface-hub # librosa # mistral-common - # multidict # pqdm # pydantic # pydantic-core - # rich # torch # typer tzdata==2024.2