Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 15 additions & 7 deletions pydantic_ai_slim/pydantic_ai/_agent_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,9 +434,11 @@ async def _run_stream( # noqa: C901
if self._events_iterator is None:
# Ensure that the stream is only run once

async def _run_stream() -> AsyncIterator[_messages.HandleResponseEvent]:
async def _run_stream() -> AsyncIterator[_messages.HandleResponseEvent]: # noqa: C901
texts: list[str] = []
tool_calls: list[_messages.ToolCallPart] = []
thinking_parts: list[_messages.ThinkingPart] = []

for part in self.model_response.parts:
if isinstance(part, _messages.TextPart):
# ignore empty content for text parts, see #437
Expand All @@ -449,11 +451,7 @@ async def _run_stream() -> AsyncIterator[_messages.HandleResponseEvent]:
elif isinstance(part, _messages.BuiltinToolReturnPart):
yield _messages.BuiltinToolResultEvent(part)
elif isinstance(part, _messages.ThinkingPart):
# We don't need to do anything with thinking parts in this tool-calling node.
# We need to handle text parts in case there are no tool calls and/or the desired output comes
# from the text, but thinking parts should not directly influence the execution of tools or
# determination of the next node of graph execution here.
pass
thinking_parts.append(part)
else:
assert_never(part)

Expand All @@ -467,8 +465,18 @@ async def _run_stream() -> AsyncIterator[_messages.HandleResponseEvent]:
elif texts:
# No events are emitted during the handling of text responses, so we don't need to yield anything
self._next_node = await self._handle_text_response(ctx, texts)
elif thinking_parts:
# handle thinking-only responses (responses that contain only ThinkingPart instances)
# this can happen with models that support thinking mode when they don't provide
# actionable output alongside their thinking content.
self._next_node = ModelRequestNode[DepsT, NodeRunEndT](
_messages.ModelRequest(
parts=[_messages.RetryPromptPart('Responses without text or tool calls are not permitted.')]
)
)
else:
# we've got an empty response, this sometimes happens with anthropic (and perhaps other models)
# we got an empty response with no tool calls, text, or thinking
# this sometimes happens with anthropic (and perhaps other models)
# when the model has already returned text along side tool calls
# in this scenario, if text responses are allowed, we return text from the most recent model
# response, if any
Expand Down
68 changes: 68 additions & 0 deletions tests/test_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -4087,3 +4087,71 @@ def bar() -> str:
assert run.result.output == snapshot(Foo(a=0, b='a'))
assert test_model.last_model_request_parameters is not None
assert [t.name for t in test_model.last_model_request_parameters.function_tools] == snapshot(['bar'])


async def test_thinking_only_response_retry():
"""Test that thinking-only responses trigger a retry mechanism."""
from pydantic_ai.messages import ThinkingPart
from pydantic_ai.models.function import FunctionModel

call_count = 0

def model_function(messages: list[ModelMessage], info: AgentInfo) -> ModelResponse:
nonlocal call_count
call_count += 1

if call_count == 1:
# First call: return thinking-only response
return ModelResponse(
parts=[ThinkingPart(content='Let me think about this...')],
model_name='thinking-test-model',
)
else:
# Second call: return proper response
return ModelResponse(
parts=[TextPart(content='Final answer')],
model_name='thinking-test-model',
)

model = FunctionModel(model_function)
agent = Agent(model, system_prompt='You are a helpful assistant.')

result = await agent.run('Hello')

assert result.all_messages() == snapshot(
[
ModelRequest(
parts=[
SystemPromptPart(
content='You are a helpful assistant.',
timestamp=IsDatetime(),
),
UserPromptPart(
content='Hello',
timestamp=IsDatetime(),
),
]
),
ModelResponse(
parts=[ThinkingPart(content='Let me think about this...')],
usage=Usage(requests=1, request_tokens=57, response_tokens=6, total_tokens=63),
model_name='function:model_function:',
timestamp=IsDatetime(),
),
ModelRequest(
parts=[
RetryPromptPart(
content='Responses without text or tool calls are not permitted.',
tool_call_id=IsStr(),
timestamp=IsDatetime(),
)
]
),
ModelResponse(
parts=[TextPart(content='Final answer')],
usage=Usage(requests=1, request_tokens=75, response_tokens=8, total_tokens=83),
model_name='function:model_function:',
timestamp=IsDatetime(),
),
]
)