From 8332c67d9b9818bd9e7c79460fd0aa70b108fbf0 Mon Sep 17 00:00:00 2001 From: cjackal <44624812+cjackal@users.noreply.github.com> Date: Wed, 31 Dec 2025 15:12:59 +0000 Subject: [PATCH 1/3] respect server-level default chat template kwargs in reasoning parser Signed-off-by: cjackal <44624812+cjackal@users.noreply.github.com> --- vllm/entrypoints/openai/serving_chat.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py index 32a3cf04951e..f4db779a6402 100644 --- a/vllm/entrypoints/openai/serving_chat.py +++ b/vllm/entrypoints/openai/serving_chat.py @@ -659,9 +659,13 @@ async def chat_completion_stream_generator( "Tokenizer not available when `skip_tokenizer_init=True`" ) + # Pass the same chat template kwargs as used in tokenization + chat_template_kwargs = self.default_chat_template_kwargs | ( + request.chat_template_kwargs or {} + ) reasoning_parser = self.reasoning_parser( tokenizer, - chat_template_kwargs=request.chat_template_kwargs, # type: ignore + chat_template_kwargs=chat_template_kwargs, # type: ignore[call-arg] ) except RuntimeError as e: logger.exception("Error in reasoning parser creation.") @@ -1437,9 +1441,13 @@ async def chat_completion_full_generator( "Tokenizer not available when `skip_tokenizer_init=True`" ) + # Pass the same chat template kwargs as used in tokenization + chat_template_kwargs = self.default_chat_template_kwargs | ( + request.chat_template_kwargs or {} + ) reasoning_parser = self.reasoning_parser( tokenizer, - chat_template_kwargs=request.chat_template_kwargs, # type: ignore + chat_template_kwargs=chat_template_kwargs, # type: ignore[call-arg] ) except RuntimeError as e: logger.exception("Error in reasoning parser creation.") From 310e1c4374dd13b06f0d5a51a49c161dc799ebed Mon Sep 17 00:00:00 2001 From: cjackal <44624812+cjackal@users.noreply.github.com> Date: Thu, 1 Jan 2026 03:06:05 +0000 Subject: [PATCH 2/3] refactor following gemini comment Signed-off-by: cjackal <44624812+cjackal@users.noreply.github.com> --- vllm/entrypoints/openai/serving_chat.py | 10 ++++++---- vllm/entrypoints/openai/serving_engine.py | 19 ++++++++++++++++--- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py index f4db779a6402..5a916f39b128 100644 --- a/vllm/entrypoints/openai/serving_chat.py +++ b/vllm/entrypoints/openai/serving_chat.py @@ -660,8 +660,9 @@ async def chat_completion_stream_generator( ) # Pass the same chat template kwargs as used in tokenization - chat_template_kwargs = self.default_chat_template_kwargs | ( - request.chat_template_kwargs or {} + chat_template_kwargs = self._prepare_extra_chat_template_kwargs( + request.chat_template_kwargs, + self.default_chat_template_kwargs, ) reasoning_parser = self.reasoning_parser( tokenizer, @@ -1442,8 +1443,9 @@ async def chat_completion_full_generator( ) # Pass the same chat template kwargs as used in tokenization - chat_template_kwargs = self.default_chat_template_kwargs | ( - request.chat_template_kwargs or {} + chat_template_kwargs = self._prepare_extra_chat_template_kwargs( + request.chat_template_kwargs, + self.default_chat_template_kwargs, ) reasoning_parser = self.reasoning_parser( tokenizer, diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py index f1c4ab63f05b..27cfe4286247 100644 --- a/vllm/entrypoints/openai/serving_engine.py +++ b/vllm/entrypoints/openai/serving_engine.py @@ -1146,6 +1146,18 @@ def _validate_chat_template( ) return None + def _prepare_extra_chat_template_kwargs( + self, + request_chat_template_kwargs: dict[str, Any] | None = None, + default_chat_template_kwargs: dict[str, Any] | None = None, + ) -> dict[str, Any]: + """Helper to merge server-default and request-specific chat template kwargs.""" + request_chat_template_kwargs = request_chat_template_kwargs or {} + if default_chat_template_kwargs is None: + return request_chat_template_kwargs + # Apply server defaults first, then request kwargs override. + return default_chat_template_kwargs | request_chat_template_kwargs + async def _preprocess_chat( self, request: ChatLikeRequest | ResponsesRequest, @@ -1184,9 +1196,10 @@ async def _preprocess_chat( tools=tool_dicts, documents=documents, ) - if default_chat_template_kwargs: - _chat_template_kwargs.update(default_chat_template_kwargs) - _chat_template_kwargs.update(chat_template_kwargs or {}) + _chat_template_kwargs |= self._prepare_extra_chat_template_kwargs( + chat_template_kwargs, + default_chat_template_kwargs, + ) request_prompt: str | list[int] From 9a72ac9615b9c6797823a758fcd3d5185dc2d7b3 Mon Sep 17 00:00:00 2001 From: cjackal <44624812+cjackal@users.noreply.github.com> Date: Sun, 4 Jan 2026 11:31:02 +0900 Subject: [PATCH 3/3] Update vllm/entrypoints/openai/serving_engine.py Co-authored-by: Chauncey Signed-off-by: cjackal <44624812+cjackal@users.noreply.github.com> --- vllm/entrypoints/openai/serving_engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py index 27cfe4286247..e65141edd144 100644 --- a/vllm/entrypoints/openai/serving_engine.py +++ b/vllm/entrypoints/openai/serving_engine.py @@ -1146,8 +1146,8 @@ def _validate_chat_template( ) return None + @staticmethod def _prepare_extra_chat_template_kwargs( - self, request_chat_template_kwargs: dict[str, Any] | None = None, default_chat_template_kwargs: dict[str, Any] | None = None, ) -> dict[str, Any]: