diff --git a/tests/entrypoints/test_chat_utils.py b/tests/entrypoints/test_chat_utils.py index 378c2624f7d9..ca87b3e76b3f 100644 --- a/tests/entrypoints/test_chat_utils.py +++ b/tests/entrypoints/test_chat_utils.py @@ -1882,6 +1882,39 @@ def test_resolve_hf_chat_template_kwargs(sample_json_schema, model, expected_kwa ) assert set(resolved_chat_template_kwargs.keys()) == expected_kwargs + # Additional test: Verify HF base parameters work with **kwargs tokenizers + # This validates the fix for tokenizers like Kimi K2 that use **kwargs + # to receive standard HuggingFace parameters instead of declaring them explicitly + from vllm.entrypoints.chat_utils import _get_hf_base_chat_template_params + + hf_base_params = _get_hf_base_chat_template_params() + # Verify common HF parameters are in the base class + assert {"add_generation_prompt", "tools", "continue_final_message"}.issubset( + hf_base_params + ), f"Expected HF base params not found in {hf_base_params}" + + # Test with a mock tokenizer that uses **kwargs (like Kimi K2) + class MockTokenizerWithKwargs: + def apply_chat_template(self, conversation, **kwargs): + return "mocked_output" + + mock_tokenizer = MockTokenizerWithKwargs() + mock_kwargs = { + "add_generation_prompt": True, + "tools": tools, + "continue_final_message": False, + "unknown_param": "should_be_filtered", + } + resolved_mock = resolve_chat_template_kwargs( + mock_tokenizer, chat_template, mock_kwargs, raise_on_unexpected=False + ) + # HF base params should pass through even with **kwargs tokenizer + assert "add_generation_prompt" in resolved_mock + assert "tools" in resolved_mock + assert "continue_final_message" in resolved_mock + # Unknown params should be filtered out + assert "unknown_param" not in resolved_mock + # NOTE: Qwen2-Audio default chat template is specially defined inside # processor class instead of using `tokenizer_config.json` diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py index 4c73e94fb72b..09641aaff306 100644 --- a/vllm/entrypoints/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -2,6 +2,7 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import asyncio +import inspect import json from abc import ABC, abstractmethod from collections import Counter, defaultdict, deque @@ -1515,6 +1516,24 @@ def _resolve_chat_template_kwargs( _cached_resolve_chat_template_kwargs = lru_cache(_resolve_chat_template_kwargs) +@lru_cache +def _get_hf_base_chat_template_params() -> frozenset[str]: + # Get standard parameters from HuggingFace's base tokenizer class. + # This dynamically extracts parameters from PreTrainedTokenizer's + # apply_chat_template method, ensuring compatibility with tokenizers + # that use **kwargs to receive standard parameters. + + # Read signature from HF's base class - the single source of truth + base_sig = inspect.signature(PreTrainedTokenizer.apply_chat_template) + # Exclude VAR_KEYWORD (**kwargs) and VAR_POSITIONAL (*args) placeholders + return frozenset( + p.name + for p in base_sig.parameters.values() + if p.kind + not in (inspect.Parameter.VAR_KEYWORD, inspect.Parameter.VAR_POSITIONAL) + ) + + def resolve_chat_template_kwargs( tokenizer: PreTrainedTokenizer | PreTrainedTokenizerFast, chat_template: str, @@ -1538,7 +1557,11 @@ def resolve_chat_template_kwargs( if supports_kw(tokenizer.apply_chat_template, k, allow_var_kwargs=False) } template_vars = _cached_resolve_chat_template_kwargs(chat_template) - accept_vars = (fn_kw | template_vars) - unexpected_vars + + # Allow standard HF parameters even if tokenizer uses **kwargs to receive them + hf_base_params = _get_hf_base_chat_template_params() + + accept_vars = (fn_kw | template_vars | hf_base_params) - unexpected_vars return {k: v for k, v in chat_template_kwargs.items() if k in accept_vars}