diff --git a/requirements/test.in b/requirements/test.in index 3be580db0674..c5d2c4cd4c30 100644 --- a/requirements/test.in +++ b/requirements/test.in @@ -34,7 +34,7 @@ num2words # required for smolvlm test opencv-python-headless >= 4.11.0 # required for video test datamodel_code_generator # required for minicpm3 test lm-eval[api]==0.4.8 # required for model evaluation test -transformers==4.51.1 +transformers==4.51.3 tokenizers==0.21.1 huggingface-hub[hf_xet]>=0.30.0 # Required for Xet downloads. schemathesis>=3.39.15 # Required for openai schema test. diff --git a/requirements/test.txt b/requirements/test.txt index 6dcd4ff01460..9642a5bfe68d 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -737,7 +737,7 @@ tqdm==4.66.6 # transformers tqdm-multiprocess==0.0.11 # via lm-eval -transformers==4.51.1 +transformers==4.51.3 # via # -r requirements/test.in # genai-perf diff --git a/tests/models/decoder_only/language/test_models.py b/tests/models/decoder_only/language/test_models.py index 79fa3fa99773..85714b85e7eb 100644 --- a/tests/models/decoder_only/language/test_models.py +++ b/tests/models/decoder_only/language/test_models.py @@ -9,6 +9,7 @@ from vllm.platforms import current_platform +from ...registry import HF_EXAMPLE_MODELS from ...utils import check_logprobs_close # These have unsupported head_dim for FA. We do not @@ -33,54 +34,50 @@ # @maybe_test_rocm_aiter @pytest.mark.parametrize( - "model", + "model_arch", [ pytest.param( - "bigscience/bloom-560m", # bloom - testing alibi slopes + "BloomForCausalLM", # testing alibi slopes marks=[pytest.mark.core_model, pytest.mark.cpu_model], ), pytest.param( - "openai-community/gpt2", # gpt2 + "GPT2LMHeadModel", # gpt2 marks=[pytest.mark.core_model, pytest.mark.cpu_model], ), - pytest.param("Milos/slovak-gpt-j-405M"), # gptj - pytest.param("bigcode/tiny_starcoder_py"), # gpt_bigcode - pytest.param("EleutherAI/pythia-70m"), # gpt_neox + pytest.param("GPTJForCausalLM"), + pytest.param("GPTBigCodeForCausalLM"), + pytest.param("GPTNeoXForCausalLM"), pytest.param( - "google/gemma-1.1-2b-it", # gemma + "GemmaForCausalLM", # gemma marks=[pytest.mark.core_model, pytest.mark.cpu_model], ), + pytest.param("GlmForCausalLM"), pytest.param( - "THUDM/chatglm3-6b", # chatglm (text-only) - ), - pytest.param( - "meta-llama/Llama-3.2-1B-Instruct", # llama + "LlamaForCausalLM", marks=[pytest.mark.core_model, pytest.mark.cpu_model], ), pytest.param( - "openbmb/MiniCPM3-4B", + "MiniCPM3ForCausalLM", # fused_moe not supported on CPU marks=[pytest.mark.core_model], ), pytest.param( - "facebook/opt-125m", # opt + "OPTForCausalLM", marks=[pytest.mark.core_model, pytest.mark.cpu_model], ), pytest.param( - "microsoft/phi-2", # phi + "PhiForCausalLM", marks=[pytest.mark.core_model], ), + pytest.param("QWenLMHeadModel", ), pytest.param( - "Qwen/Qwen-7B", # qwen (text-only) - ), - pytest.param( - "Qwen/Qwen2.5-0.5B-Instruct", # qwen2 + "Qwen2ForCausalLM", marks=[pytest.mark.core_model], ), - pytest.param("stabilityai/stablelm-3b-4e1t"), # stablelm - pytest.param("bigcode/starcoder2-3b"), # starcoder2 + pytest.param("StableLmForCausalLM"), + pytest.param("Starcoder2ForCausalLM"), pytest.param( - "ehristoforu/Falcon3-MoE-2x7B-Insruct", # mixtral + "MixtralForCausalLM", marks=[pytest.mark.cpu_model], ) ]) @@ -89,10 +86,12 @@ @pytest.mark.parametrize("num_logprobs", [5]) @pytest.mark.parametrize( "use_rocm_aiter", [True, False] if current_platform.is_rocm() else [False]) -def test_models(hf_runner, vllm_runner, example_prompts, model: str, +def test_models(hf_runner, vllm_runner, example_prompts, model_arch: str, dtype: str, max_tokens: int, num_logprobs: int, use_rocm_aiter: bool, monkeypatch) -> None: + model = HF_EXAMPLE_MODELS.get_hf_info(model_arch).default + if model in REQUIRES_V0: monkeypatch.setenv("VLLM_USE_V1", "0") diff --git a/tests/models/registry.py b/tests/models/registry.py index c15ae3619844..6b1ec64115e3 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -123,7 +123,8 @@ def check_available_online( "BambaForCausalLM": _HfExamplesInfo("ibm-ai-platform/Bamba-9B"), "BloomForCausalLM": _HfExamplesInfo("bigscience/bloomz-1b1"), "ChatGLMModel": _HfExamplesInfo("THUDM/chatglm3-6b", - trust_remote_code=True), + trust_remote_code=True, + max_transformers_version="4.51.1"), "ChatGLMForConditionalGeneration": _HfExamplesInfo("thu-coai/ShieldLM-6B-chatglm3", # noqa: E501 trust_remote_code=True), "CohereForCausalLM": _HfExamplesInfo("CohereForAI/c4ai-command-r-v01",