Skip to content

Commit d6a65ca

Browse files
authored
Support local GGUF in VLLM and use HF tokenizer #943 (#972)
* Support local GGUF in VLLM and use HF tokenizer #943 * Improve the readability of implementation
1 parent 16318bb commit d6a65ca

File tree

1 file changed

+4
-1
lines changed

1 file changed

+4
-1
lines changed

src/lighteval/models/vllm/vllm_model.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,8 @@ class VLLMModelConfig(ModelConfig):
8585
Attributes:
8686
model_name (str):
8787
HuggingFace Hub model ID or path to the model to load.
88+
tokenizer (str | None):
89+
HuggingFace Hub model ID or path to the tokenizer to load.
8890
revision (str):
8991
Git revision of the model. Defaults to "main".
9092
dtype (str):
@@ -150,6 +152,7 @@ class VLLMModelConfig(ModelConfig):
150152
"""
151153

152154
model_name: str
155+
tokenizer: str | None = None
153156
revision: str = "main" # revision of the model
154157
dtype: str = "bfloat16"
155158
tensor_parallel_size: PositiveInt = 1 # how many GPUs to use for tensor parallelism
@@ -289,7 +292,7 @@ def _create_auto_model(self, config: VLLMModelConfig) -> Optional[LLM]:
289292

290293
def _create_auto_tokenizer(self, config: VLLMModelConfig):
291294
tokenizer = get_tokenizer(
292-
config.model_name,
295+
config.tokenizer or config.model_name, # use HF tokenizer for non-HF models, like GGUF model.
293296
tokenizer_mode="auto",
294297
trust_remote_code=config.trust_remote_code,
295298
revision=config.revision,

0 commit comments

Comments
 (0)