Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions vllm/tokenizers/hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,16 @@

from transformers import AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast

from vllm.logger import init_logger
from vllm.transformers_utils.config import get_sentence_transformer_tokenizer_config
from vllm.transformers_utils.gguf_utils import extract_eos_token_id_from_gguf

from .protocol import TokenizerLike

HfTokenizer: TypeAlias = PreTrainedTokenizer | PreTrainedTokenizerFast

logger = init_logger(__name__)


def get_cached_tokenizer(tokenizer: HfTokenizer) -> HfTokenizer:
"""
Expand Down Expand Up @@ -75,6 +79,9 @@ def from_pretrained(
download_dir: str | None = None,
**kwargs,
) -> HfTokenizer:
# Save gguf_file before AutoTokenizer.from_pretrained() pops it from kwargs
gguf_file = kwargs.get("gguf_file")

try:
tokenizer = AutoTokenizer.from_pretrained(
path_or_repo_id,
Expand Down Expand Up @@ -116,4 +123,23 @@ def from_pretrained(
}
tokenizer.add_special_tokens(special_tokens_map)

# Patch EOS token ID from GGUF metadata if available
# GGUF files may have a different EOS token ID than HF tokenizer config
# (e.g., Gemma uses <end_of_turn> ID 106 as EOS, but HF reports <eos> ID 1)
# Note: gguf_file was saved above before
# AutoTokenizer.from_pretrained() popped it
if gguf_file:
gguf_path = Path(path_or_repo_id) / gguf_file
gguf_eos_id = extract_eos_token_id_from_gguf(str(gguf_path))
if gguf_eos_id is not None:
hf_eos_id = tokenizer.eos_token_id
if hf_eos_id != gguf_eos_id:
logger.info(
"Patching tokenizer eos_token_id from %d to %d "
"(using GGUF metadata)",
hf_eos_id,
gguf_eos_id,
)
tokenizer.eos_token_id = gguf_eos_id

return get_cached_tokenizer(tokenizer)
40 changes: 40 additions & 0 deletions vllm/transformers_utils/gguf_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,46 @@ def extract_vision_config_from_gguf(mmproj_path: str) -> "SiglipVisionConfig | N
return config


def extract_eos_token_id_from_gguf(model: str) -> int | None:
"""Extract EOS token ID from GGUF metadata.

GGUF files store the EOS token ID in tokenizer.ggml.eos_token_id field.
This may differ from HuggingFace's tokenizer config (e.g., Gemma models
use <end_of_turn> token ID 106 as EOS in GGUF, but HF tokenizer reports
<eos> token ID 1).

Args:
model: Path to GGUF model file

Returns:
EOS token ID from GGUF metadata, or None if not found
"""
# Note: We don't check for .gguf extension here because HuggingFace Hub
# stores GGUF files as blob hashes without extensions. The caller is
# responsible for ensuring this is a valid GGUF file (via check_gguf_file).
try:
model_path = Path(model)
if not model_path.is_file():
return None

reader = gguf.GGUFReader(str(model_path))

eos_field = reader.get_field(Keys.Tokenizer.EOS_ID)
if eos_field is not None:
eos_token_id = int(eos_field.parts[-1][0])
logger.debug(
"Extracted eos_token_id=%d from GGUF metadata",
eos_token_id,
)
return eos_token_id

return None

except Exception as e:
logger.debug("Error extracting EOS token ID from GGUF: %s", e)
return None


def maybe_patch_hf_config_from_gguf(
model: str,
hf_config: PretrainedConfig,
Expand Down