From fb3648040ecc9cb289ca6ef7261b5030511fcf75 Mon Sep 17 00:00:00 2001 From: Lewis Tunstall Date: Thu, 15 May 2025 09:59:01 +0000 Subject: [PATCH 1/4] Fix revision arg for vLLM tokenizer --- src/lighteval/models/vllm/vllm_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lighteval/models/vllm/vllm_model.py b/src/lighteval/models/vllm/vllm_model.py index 40352b4da..e539b926f 100644 --- a/src/lighteval/models/vllm/vllm_model.py +++ b/src/lighteval/models/vllm/vllm_model.py @@ -204,7 +204,7 @@ def _create_auto_tokenizer(self, config: VLLMModelConfig): config.model_name, tokenizer_mode="auto", trust_remote_code=config.trust_remote_code, - tokenizer_revision=config.revision, + revision=config.revision, ) tokenizer.pad_token = tokenizer.eos_token return tokenizer From a2e522c012f6689639b7707b5240e23629010b45 Mon Sep 17 00:00:00 2001 From: Lewis Tunstall Date: Thu, 15 May 2025 10:11:28 +0000 Subject: [PATCH 2/4] Add unit test --- tests/models/vllm/test_vllm_model.py | 40 ++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 tests/models/vllm/test_vllm_model.py diff --git a/tests/models/vllm/test_vllm_model.py b/tests/models/vllm/test_vllm_model.py new file mode 100644 index 000000000..89b0eda7a --- /dev/null +++ b/tests/models/vllm/test_vllm_model.py @@ -0,0 +1,40 @@ +# MIT License + +# Copyright (c) 2025 The HuggingFace Team + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import unittest + +from transformers import AutoTokenizer + +from lighteval.models.vllm.vllm_model import VLLMModel, VLLMModelConfig + + +class TestVLLMTokenizerCreation(unittest.TestCase): + def test_tokenizer_created_with_correct_revision(self): + config = VLLMModelConfig( + model_name="lewtun/different-chat-templates-per-revision", revision="new_chat_template" + ) + vllm_tokenizer = VLLMModel.__new__(VLLMModel)._create_auto_tokenizer(config) + tokenizer = AutoTokenizer.from_pretrained( + config.model_name, + revision=config.revision, + ) + self.assertEqual(vllm_tokenizer.chat_template, tokenizer.chat_template) From c8425d04e42dbe7d4698d6460f21a04f48b05964 Mon Sep 17 00:00:00 2001 From: Lewis Tunstall Date: Thu, 15 May 2025 10:15:44 +0000 Subject: [PATCH 3/4] Update test --- tests/models/vllm/test_vllm_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/models/vllm/test_vllm_model.py b/tests/models/vllm/test_vllm_model.py index 89b0eda7a..9eb43fc69 100644 --- a/tests/models/vllm/test_vllm_model.py +++ b/tests/models/vllm/test_vllm_model.py @@ -30,7 +30,7 @@ class TestVLLMTokenizerCreation(unittest.TestCase): def test_tokenizer_created_with_correct_revision(self): config = VLLMModelConfig( - model_name="lewtun/different-chat-templates-per-revision", revision="new_chat_template" + model_name="lighteval-internal-testing/different-chat-templates-per-revision", revision="new_chat_template" ) vllm_tokenizer = VLLMModel.__new__(VLLMModel)._create_auto_tokenizer(config) tokenizer = AutoTokenizer.from_pretrained( From 61f62f4eb41db3b2ea77dd11dae2b8007b18e1a9 Mon Sep 17 00:00:00 2001 From: Lewis Tunstall Date: Thu, 15 May 2025 12:09:29 +0000 Subject: [PATCH 4/4] Move test repo --- tests/models/vllm/test_vllm_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/models/vllm/test_vllm_model.py b/tests/models/vllm/test_vllm_model.py index 9eb43fc69..986f41cb9 100644 --- a/tests/models/vllm/test_vllm_model.py +++ b/tests/models/vllm/test_vllm_model.py @@ -30,7 +30,7 @@ class TestVLLMTokenizerCreation(unittest.TestCase): def test_tokenizer_created_with_correct_revision(self): config = VLLMModelConfig( - model_name="lighteval-internal-testing/different-chat-templates-per-revision", revision="new_chat_template" + model_name="lighteval/different-chat-templates-per-revision", revision="new_chat_template" ) vllm_tokenizer = VLLMModel.__new__(VLLMModel)._create_auto_tokenizer(config) tokenizer = AutoTokenizer.from_pretrained(