diff --git a/vllm/entrypoints/openai/chat_completion/serving.py b/vllm/entrypoints/openai/chat_completion/serving.py index 39f8635bf297..9de8337afab4 100644 --- a/vllm/entrypoints/openai/chat_completion/serving.py +++ b/vllm/entrypoints/openai/chat_completion/serving.py @@ -1512,17 +1512,7 @@ async def chat_completion_full_generator( tool_call_class = ( MistralToolCall if is_mistral_tokenizer(tokenizer) else ToolCall ) - if self.use_harmony: - # Harmony models already have parsed content and tool_calls - # through parse_chat_output. Respect its output directly. - message = ChatMessage( - role=role, - reasoning=reasoning, - content=content, - tool_calls=tool_calls if tool_calls else [], - ) - - elif (not self.enable_auto_tools or not self.tool_parser) and ( + if (not self.enable_auto_tools or not self.tool_parser) and ( not isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam) and request.tool_choice != "required" ):