diff --git a/vllm/tokenizers/hf.py b/vllm/tokenizers/hf.py index a7b565dca5d8..596b3bdbc623 100644 --- a/vllm/tokenizers/hf.py +++ b/vllm/tokenizers/hf.py @@ -7,12 +7,16 @@ from transformers import AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast +from vllm.logger import init_logger from vllm.transformers_utils.config import get_sentence_transformer_tokenizer_config +from vllm.transformers_utils.gguf_utils import extract_eos_token_id_from_gguf from .protocol import TokenizerLike HfTokenizer: TypeAlias = PreTrainedTokenizer | PreTrainedTokenizerFast +logger = init_logger(__name__) + def get_cached_tokenizer(tokenizer: HfTokenizer) -> HfTokenizer: """ @@ -75,6 +79,9 @@ def from_pretrained( download_dir: str | None = None, **kwargs, ) -> HfTokenizer: + # Save gguf_file before AutoTokenizer.from_pretrained() pops it from kwargs + gguf_file = kwargs.get("gguf_file") + try: tokenizer = AutoTokenizer.from_pretrained( path_or_repo_id, @@ -116,4 +123,23 @@ def from_pretrained( } tokenizer.add_special_tokens(special_tokens_map) + # Patch EOS token ID from GGUF metadata if available + # GGUF files may have a different EOS token ID than HF tokenizer config + # (e.g., Gemma uses ID 106 as EOS, but HF reports ID 1) + # Note: gguf_file was saved above before + # AutoTokenizer.from_pretrained() popped it + if gguf_file: + gguf_path = Path(path_or_repo_id) / gguf_file + gguf_eos_id = extract_eos_token_id_from_gguf(str(gguf_path)) + if gguf_eos_id is not None: + hf_eos_id = tokenizer.eos_token_id + if hf_eos_id != gguf_eos_id: + logger.info( + "Patching tokenizer eos_token_id from %d to %d " + "(using GGUF metadata)", + hf_eos_id, + gguf_eos_id, + ) + tokenizer.eos_token_id = gguf_eos_id + return get_cached_tokenizer(tokenizer) diff --git a/vllm/transformers_utils/gguf_utils.py b/vllm/transformers_utils/gguf_utils.py index 3faa5ee60e9f..321011628678 100644 --- a/vllm/transformers_utils/gguf_utils.py +++ b/vllm/transformers_utils/gguf_utils.py @@ -220,6 +220,46 @@ def extract_vision_config_from_gguf(mmproj_path: str) -> "SiglipVisionConfig | N return config +def extract_eos_token_id_from_gguf(model: str) -> int | None: + """Extract EOS token ID from GGUF metadata. + + GGUF files store the EOS token ID in tokenizer.ggml.eos_token_id field. + This may differ from HuggingFace's tokenizer config (e.g., Gemma models + use token ID 106 as EOS in GGUF, but HF tokenizer reports + token ID 1). + + Args: + model: Path to GGUF model file + + Returns: + EOS token ID from GGUF metadata, or None if not found + """ + # Note: We don't check for .gguf extension here because HuggingFace Hub + # stores GGUF files as blob hashes without extensions. The caller is + # responsible for ensuring this is a valid GGUF file (via check_gguf_file). + try: + model_path = Path(model) + if not model_path.is_file(): + return None + + reader = gguf.GGUFReader(str(model_path)) + + eos_field = reader.get_field(Keys.Tokenizer.EOS_ID) + if eos_field is not None: + eos_token_id = int(eos_field.parts[-1][0]) + logger.debug( + "Extracted eos_token_id=%d from GGUF metadata", + eos_token_id, + ) + return eos_token_id + + return None + + except Exception as e: + logger.debug("Error extracting EOS token ID from GGUF: %s", e) + return None + + def maybe_patch_hf_config_from_gguf( model: str, hf_config: PretrainedConfig,