pydantic · DouweM · Aug 14, 2025 · Aug 7, 2025 · Aug 14, 2025
diff --git a/pydantic_ai_slim/pydantic_ai/_agent_graph.py b/pydantic_ai_slim/pydantic_ai/_agent_graph.py
@@ -434,9 +434,11 @@ async def _run_stream(  # noqa: C901
         if self._events_iterator is None:
             # Ensure that the stream is only run once
 
-            async def _run_stream() -> AsyncIterator[_messages.HandleResponseEvent]:
+            async def _run_stream() -> AsyncIterator[_messages.HandleResponseEvent]:  # noqa: C901
                 texts: list[str] = []
                 tool_calls: list[_messages.ToolCallPart] = []
+                thinking_parts: list[_messages.ThinkingPart] = []
+
                 for part in self.model_response.parts:
                     if isinstance(part, _messages.TextPart):
                         # ignore empty content for text parts, see #437
@@ -449,11 +451,7 @@ async def _run_stream() -> AsyncIterator[_messages.HandleResponseEvent]:
                     elif isinstance(part, _messages.BuiltinToolReturnPart):
                         yield _messages.BuiltinToolResultEvent(part)
                     elif isinstance(part, _messages.ThinkingPart):
-                        # We don't need to do anything with thinking parts in this tool-calling node.
-                        # We need to handle text parts in case there are no tool calls and/or the desired output comes
-                        # from the text, but thinking parts should not directly influence the execution of tools or
-                        # determination of the next node of graph execution here.
-                        pass
+                        thinking_parts.append(part)
                     else:
                         assert_never(part)
 
@@ -467,8 +465,18 @@ async def _run_stream() -> AsyncIterator[_messages.HandleResponseEvent]:
                 elif texts:
                     # No events are emitted during the handling of text responses, so we don't need to yield anything
                     self._next_node = await self._handle_text_response(ctx, texts)
+                elif thinking_parts:
+                    # handle thinking-only responses (responses that contain only ThinkingPart instances)
+                    # this can happen with models that support thinking mode when they don't provide
+                    # actionable output alongside their thinking content.
+                    self._next_node = ModelRequestNode[DepsT, NodeRunEndT](
+                        _messages.ModelRequest(
+                            parts=[_messages.RetryPromptPart('Responses without text or tool calls are not permitted.')]
+                        )
+                    )
                 else:
-                    # we've got an empty response, this sometimes happens with anthropic (and perhaps other models)
+                    # we got an empty response with no tool calls, text, or thinking
+                    # this sometimes happens with anthropic (and perhaps other models)
                     # when the model has already returned text along side tool calls
                     # in this scenario, if text responses are allowed, we return text from the most recent model
                     # response, if any

diff --git a/tests/test_agent.py b/tests/test_agent.py
@@ -4087,3 +4087,71 @@ def bar() -> str:
     assert run.result.output == snapshot(Foo(a=0, b='a'))
     assert test_model.last_model_request_parameters is not None
     assert [t.name for t in test_model.last_model_request_parameters.function_tools] == snapshot(['bar'])
+
+
+async def test_thinking_only_response_retry():
+    """Test that thinking-only responses trigger a retry mechanism."""
+    from pydantic_ai.messages import ThinkingPart
+    from pydantic_ai.models.function import FunctionModel
+
+    call_count = 0
+
+    def model_function(messages: list[ModelMessage], info: AgentInfo) -> ModelResponse:
+        nonlocal call_count
+        call_count += 1
+
+        if call_count == 1:
+            # First call: return thinking-only response
+            return ModelResponse(
+                parts=[ThinkingPart(content='Let me think about this...')],
+                model_name='thinking-test-model',
+            )
+        else:
+            # Second call: return proper response
+            return ModelResponse(
+                parts=[TextPart(content='Final answer')],
+                model_name='thinking-test-model',
+            )
+
+    model = FunctionModel(model_function)
+    agent = Agent(model, system_prompt='You are a helpful assistant.')
+
+    result = await agent.run('Hello')
+
+    assert result.all_messages() == snapshot(
+        [
+            ModelRequest(
+                parts=[
+                    SystemPromptPart(
+                        content='You are a helpful assistant.',
+                        timestamp=IsDatetime(),
+                    ),
+                    UserPromptPart(
+                        content='Hello',
+                        timestamp=IsDatetime(),
+                    ),
+                ]
+            ),
+            ModelResponse(
+                parts=[ThinkingPart(content='Let me think about this...')],
+                usage=Usage(requests=1, request_tokens=57, response_tokens=6, total_tokens=63),
+                model_name='function:model_function:',
+                timestamp=IsDatetime(),
+            ),
+            ModelRequest(
+                parts=[
+                    RetryPromptPart(
+                        content='Responses without text or tool calls are not permitted.',
+                        tool_call_id=IsStr(),
+                        timestamp=IsDatetime(),
+                    )
+                ]
+            ),
+            ModelResponse(
+                parts=[TextPart(content='Final answer')],
+                usage=Usage(requests=1, request_tokens=75, response_tokens=8, total_tokens=83),
+                model_name='function:model_function:',
+                timestamp=IsDatetime(),
+            ),
+        ]
+    )