-
-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[Bugfix]: Prevent reasoning_content leak #32997
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -139,3 +139,84 @@ async def test_chat_full_of_tool_and_reasoning(client: openai.AsyncOpenAI): | |
| assert len(tool_calls.choices[0].message.reasoning) > 0 | ||
| assert tool_calls.choices[0].message.tool_calls[0].function.name == FUNC_NAME | ||
| assert tool_calls.choices[0].message.tool_calls[0].function.arguments == FUNC_ARGS | ||
|
|
||
|
|
||
| # test that content does not leak into final chunk when finish_reason=tool_calls | ||
| @pytest.mark.asyncio | ||
| async def test_no_content_leak_when_finish_reason_tool_calls( | ||
| client: openai.AsyncOpenAI, | ||
| ): | ||
| """ | ||
| Test that when finish_reason='tool_calls', the final chunk does not | ||
| contain any content field. This prevents reasoning_content from leaking | ||
| into content, which violates OpenAI's schema contract. | ||
|
|
||
| This test specifically targets the bug where leftover reasoning buffers | ||
| (especially from speculative decoding) were incorrectly flushed into | ||
| the content field in the final streamed chunk. | ||
| """ | ||
| stream = await client.chat.completions.create( | ||
| model=MODEL_NAME, | ||
| messages=MESSAGES, | ||
| tools=TOOLS, | ||
| temperature=0.0, | ||
| stream=True, | ||
| tool_choice="auto", | ||
| include_reasoning=True, | ||
| ) | ||
|
|
||
| chunks = [] | ||
| final_chunk = None | ||
| async for chunk in stream: | ||
| chunks.append(chunk) | ||
| # Track the final chunk with finish_reason | ||
| if chunk.choices and chunk.choices[0].finish_reason: | ||
| final_chunk = chunk | ||
|
|
||
| # Ensure we got a final chunk with tool_calls | ||
| assert final_chunk is not None, "Expected a final chunk with finish_reason" | ||
| assert final_chunk.choices[0].finish_reason == "tool_calls", ( | ||
| "Expected finish_reason to be 'tool_calls'" | ||
| ) | ||
|
|
||
| delta = final_chunk.choices[0].delta | ||
|
|
||
| # Per OpenAI spec, when finish_reason='tool_calls', content must be null/absent | ||
| # This is the core fix: prevent reasoning_content from leaking into content | ||
| assert delta.content is None or delta.content == "", ( | ||
| f"Final chunk with finish_reason='tool_calls' must not have content. " | ||
| f"Got content='{delta.content}'. This indicates reasoning_content leaked " | ||
| f"into content field." | ||
| ) | ||
|
|
||
| # Also ensure reasoning fields are not present in final chunk | ||
| # (they should only appear in earlier chunks) | ||
| reasoning = getattr(delta, "reasoning", None) | ||
| reasoning_content = getattr(delta, "reasoning_content", None) | ||
| assert reasoning is None or reasoning == "", ( | ||
| "Final chunk with tool_calls should not have reasoning field" | ||
| ) | ||
| assert reasoning_content is None or reasoning_content == "", ( | ||
| "Final chunk with tool_calls should not have reasoning_content field" | ||
| ) | ||
|
|
||
| # Verify tool_calls are present (the expected behavior) | ||
| assert delta.tool_calls is not None and len(delta.tool_calls) > 0, ( | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. AssertionError: Final chunk with finish_reason='tool_calls' must have tool_calls I ran this test on your branch and got the error shown above. |
||
| "Final chunk with finish_reason='tool_calls' must have tool_calls" | ||
| ) | ||
|
|
||
| # Verify reasoning was streamed in earlier chunks (not in final) | ||
| reasoning_found_in_earlier_chunks = False | ||
| for chunk in chunks[:-1]: # All chunks except the final one | ||
| if chunk.choices: | ||
| delta = chunk.choices[0].delta | ||
| if hasattr(delta, "reasoning") and delta.reasoning: | ||
| reasoning_found_in_earlier_chunks = True | ||
| break | ||
| if hasattr(delta, "reasoning_content") and delta.reasoning_content: | ||
| reasoning_found_in_earlier_chunks = True | ||
| break | ||
|
|
||
| assert reasoning_found_in_earlier_chunks, ( | ||
| "Reasoning should be streamed in earlier chunks, not in final chunk" | ||
| ) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -911,6 +911,13 @@ async def chat_completion_stream_generator( | |
| ) | ||
| ) | ||
| harmony_tools_streamed[i] |= tools_streamed_flag | ||
| # Ensure no content leaks when tool calls are present | ||
| # in harmony path. Per OpenAI spec, tool call deltas | ||
| # must not contain content. | ||
| if tools_streamed_flag and delta_message: | ||
| delta_message.content = None | ||
RohanDisa marked this conversation as resolved.
Show resolved
Hide resolved
RohanDisa marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| delta_message.reasoning = None | ||
| delta_message.reasoning_content = None | ||
| # handle streaming deltas for tools with named tool_choice | ||
| elif tool_choice_function_name: | ||
| if ( | ||
|
|
@@ -980,6 +987,7 @@ async def chat_completion_stream_generator( | |
| ] | ||
| ) | ||
| tools_streamed[i] = True | ||
| self._clear_non_tool_call_fields(delta_message) | ||
|
|
||
| elif request.tool_choice == "required": | ||
| assert previous_texts is not None | ||
|
|
@@ -1036,6 +1044,7 @@ async def chat_completion_stream_generator( | |
| ): | ||
| history_tool_call_cnt += 1 | ||
| tools_streamed[i] = True | ||
| self._clear_non_tool_call_fields(delta_message) | ||
|
|
||
| # handle streaming deltas for tools with "auto" tool choice | ||
| # and reasoning parser | ||
|
|
@@ -1111,6 +1120,7 @@ async def chat_completion_stream_generator( | |
| ) | ||
| if delta_message and delta_message.tool_calls: | ||
| tools_streamed[i] = True | ||
| self._clear_non_tool_call_fields(delta_message) | ||
| # when only tool calls | ||
| elif tool_choice_auto: | ||
| assert tool_parser is not None | ||
|
|
@@ -1125,6 +1135,7 @@ async def chat_completion_stream_generator( | |
| ) | ||
| if delta_message and delta_message.tool_calls: | ||
| tools_streamed[i] = True | ||
| self._clear_non_tool_call_fields(delta_message) | ||
|
|
||
| # when only reasoning | ||
| elif self.reasoning_parser: | ||
|
|
@@ -1290,6 +1301,18 @@ async def chat_completion_stream_generator( | |
| finish_reason_ = ( | ||
| output.finish_reason if output.finish_reason else "stop" | ||
| ) | ||
|
|
||
| # When finish_reason is "tool_calls", ensure no content | ||
| # or reasoning fields leak into the final delta. | ||
| # Per OpenAI spec, tool call responses must only contain | ||
| # tool_calls and finish_reason, never content. | ||
| if finish_reason_ == "tool_calls": | ||
| if delta_message is None: | ||
| # Create empty delta message if none exists | ||
| delta_message = DeltaMessage() | ||
| else: | ||
| self._clear_non_tool_call_fields(delta_message) | ||
|
|
||
| choice_data = ChatCompletionResponseStreamChoice( | ||
| index=i, | ||
| delta=delta_message, | ||
|
|
@@ -1824,6 +1847,18 @@ def _create_chat_logprobs( | |
|
|
||
| return ChatCompletionLogProbs(content=logprobs_content) | ||
|
|
||
| def _clear_non_tool_call_fields(self, delta_message: DeltaMessage) -> None: | ||
| """ | ||
| Clear content and reasoning fields from a delta message. | ||
|
|
||
| Per OpenAI spec, tool call deltas must not contain content or reasoning | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Could you share the link? I couldn’t find any similar documentation or explanation. |
||
| fields. This prevents leakage from reasoning buffers or speculative | ||
| decoding when tool calls are present. | ||
| """ | ||
| delta_message.content = None | ||
| delta_message.reasoning = None | ||
| delta_message.reasoning_content = None | ||
|
|
||
| def _should_stream_with_auto_tool_parsing(self, request: ChatCompletionRequest): | ||
| """ | ||
| Utility function to check if streamed tokens should go through the tool | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
got an unexpected keyword argument 'include_reasoning'