vllm-project · Isotr0py · Oct 28, 2025 · Oct 28, 2025 · Oct 28, 2025 · Oct 28, 2025
@@ -1882,6 +1882,39 @@ def test_resolve_hf_chat_template_kwargs(sample_json_schema, model, expected_kwa
     )
     assert set(resolved_chat_template_kwargs.keys()) == expected_kwargs
 
+    # Additional test: Verify HF base parameters work with **kwargs tokenizers
+    # This validates the fix for tokenizers like Kimi K2 that use **kwargs
+    # to receive standard HuggingFace parameters instead of declaring them explicitly
+    from vllm.entrypoints.chat_utils import _get_hf_base_chat_template_params
+
+    hf_base_params = _get_hf_base_chat_template_params()
+    # Verify common HF parameters are in the base class
+    assert {"add_generation_prompt", "tools", "continue_final_message"}.issubset(
+        hf_base_params
+    ), f"Expected HF base params not found in {hf_base_params}"
+
+    # Test with a mock tokenizer that uses **kwargs (like Kimi K2)
+    class MockTokenizerWithKwargs:
+        def apply_chat_template(self, conversation, **kwargs):
+            return "mocked_output"
+
+    mock_tokenizer = MockTokenizerWithKwargs()
+    mock_kwargs = {
+        "add_generation_prompt": True,
+        "tools": tools,
+        "continue_final_message": False,
+        "unknown_param": "should_be_filtered",
+    }
+    resolved_mock = resolve_chat_template_kwargs(
+        mock_tokenizer, chat_template, mock_kwargs, raise_on_unexpected=False
+    )
+    # HF base params should pass through even with **kwargs tokenizer
+    assert "add_generation_prompt" in resolved_mock
+    assert "tools" in resolved_mock
+    assert "continue_final_message" in resolved_mock
+    # Unknown params should be filtered out
+    assert "unknown_param" not in resolved_mock
+
 
 # NOTE: Qwen2-Audio default chat template is specially defined inside
 # processor class instead of using `tokenizer_config.json`

@@ -2,6 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 import asyncio
+import inspect
 import json
 from abc import ABC, abstractmethod
 from collections import Counter, defaultdict, deque
@@ -1515,6 +1516,24 @@ def _resolve_chat_template_kwargs(
 _cached_resolve_chat_template_kwargs = lru_cache(_resolve_chat_template_kwargs)
 
 
+@lru_cache
+def _get_hf_base_chat_template_params() -> frozenset[str]:
+    # Get standard parameters from HuggingFace's base tokenizer class.
+    # This dynamically extracts parameters from PreTrainedTokenizer's
+    # apply_chat_template method, ensuring compatibility with tokenizers
+    # that use **kwargs to receive standard parameters.
+
+    # Read signature from HF's base class - the single source of truth
+    base_sig = inspect.signature(PreTrainedTokenizer.apply_chat_template)
+    # Exclude VAR_KEYWORD (**kwargs) and VAR_POSITIONAL (*args) placeholders
+    return frozenset(
+        p.name
+        for p in base_sig.parameters.values()
+        if p.kind
+        not in (inspect.Parameter.VAR_KEYWORD, inspect.Parameter.VAR_POSITIONAL)
+    )
+
+
 def resolve_chat_template_kwargs(
     tokenizer: PreTrainedTokenizer | PreTrainedTokenizerFast,
     chat_template: str,
@@ -1538,7 +1557,11 @@ def resolve_chat_template_kwargs(
         if supports_kw(tokenizer.apply_chat_template, k, allow_var_kwargs=False)
     }
     template_vars = _cached_resolve_chat_template_kwargs(chat_template)
-    accept_vars = (fn_kw | template_vars) - unexpected_vars
+
+    # Allow standard HF parameters even if tokenizer uses **kwargs to receive them
+    hf_base_params = _get_hf_base_chat_template_params()
+
+    accept_vars = (fn_kw | template_vars | hf_base_params) - unexpected_vars
 @pytest.mark.parametrize( 
     "model, expected_kwargs", 
     [ 
         ( 
             QWEN2VL_MODEL_ID, 
             { 
                 "add_vision_id", 
                 "add_generation_prompt", 
                 "continue_final_message", 
                 "tools", 
             }, 
         ), 
         ( 
             QWEN3_MODEL_ID, 
             { 
                 "enable_thinking", 
                 "add_generation_prompt", 
                 "continue_final_message", 
                 "tools", 
             }, 
         ), 
     ], 
 ) 
 def test_resolve_hf_chat_template_kwargs(sample_json_schema, model, expected_kwargs): 
 @pytest.mark.parametrize( 
     "model, expected_kwargs", 
     [ 
         ( 
             QWEN2VL_MODEL_ID, 
             { 
                 "add_vision_id", 
                 "add_generation_prompt", 
                 "continue_final_message", 
                 "tools", 
             }, 
         ), 
         ( 
             QWEN3_MODEL_ID, 
             { 
                 "enable_thinking", 
                 "add_generation_prompt", 
                 "continue_final_message", 
                 "tools", 
             }, 
         ), 
     ], 
 ) 
 def test_resolve_hf_chat_template_kwargs(sample_json_schema, model, expected_kwargs): 
     return {k: v for k, v in chat_template_kwargs.items() if k in accept_vars}