diff --git a/docs/my-website/docs/reasoning_content.md b/docs/my-website/docs/reasoning_content.md index 05c374f38da..3693ab3315e 100644 --- a/docs/my-website/docs/reasoning_content.md +++ b/docs/my-website/docs/reasoning_content.md @@ -691,3 +691,53 @@ response = await litellm.anthropic.messages.acreate( ) # The summary="concise" is preserved when routing to OpenAI's Responses API ``` + +### Default Summary Injection for `/v1/messages` Adapter + +When the Anthropic `/v1/messages` adapter translates `thinking` parameters to OpenAI `reasoning_effort` for non-Claude models, `summary="detailed"` is automatically injected by default. This ensures that reasoning text is returned in the response (matching the Anthropic thinking behavior). + +To **disable** this default injection, use the `disable_default_reasoning_summary` flag: + + + + +```python +import litellm + +# Disable default summary="detailed" injection +litellm.disable_default_reasoning_summary = True + +response = await litellm.anthropic.messages.acreate( + model="openai/gpt-5.1", + messages=[{"role": "user", "content": "Hello"}], + max_tokens=8096, + thinking={"type": "enabled", "budget_tokens": 5000}, +) +# No summary will be injected — only reasoning_effort is forwarded +``` + + + + + +```bash +export LITELLM_DISABLE_DEFAULT_REASONING_SUMMARY=true +``` + + + + + +```yaml +litellm_settings: + disable_default_reasoning_summary: true +``` + + + + +:::info + +This flag only affects the automatic injection of `summary="detailed"` when no user-provided summary is present. If you explicitly pass `thinking.summary` (e.g., `"concise"` or `"auto"`), your value is always preserved regardless of this flag. + +::: diff --git a/litellm/llms/anthropic/experimental_pass_through/adapters/transformation.py b/litellm/llms/anthropic/experimental_pass_through/adapters/transformation.py index b0138ecbf5d..049b8763328 100644 --- a/litellm/llms/anthropic/experimental_pass_through/adapters/transformation.py +++ b/litellm/llms/anthropic/experimental_pass_through/adapters/transformation.py @@ -13,6 +13,10 @@ cast, ) +from litellm.llms.anthropic.experimental_pass_through.utils import ( + is_default_reasoning_summary_disabled, +) + # OpenAI has a 64-character limit for function/tool names # Anthropic does not have this limit, so we need to truncate long names OPENAI_MAX_TOOL_NAME_LENGTH = 64 @@ -694,8 +698,11 @@ def translate_thinking_for_model( ) if reasoning_effort: summary = thinking.get("summary") if isinstance(thinking, dict) else None + summary_disabled = is_default_reasoning_summary_disabled() if summary: return {"reasoning_effort": {"effort": reasoning_effort, "summary": summary}} + elif not summary_disabled: + return {"reasoning_effort": {"effort": reasoning_effort, "summary": "detailed"}} return {"reasoning_effort": reasoning_effort} return {} diff --git a/tests/test_litellm/llms/anthropic/experimental_pass_through/messages/test_anthropic_experimental_pass_through_messages_handler.py b/tests/test_litellm/llms/anthropic/experimental_pass_through/messages/test_anthropic_experimental_pass_through_messages_handler.py index 0bc28cc0869..203b6dacea7 100644 --- a/tests/test_litellm/llms/anthropic/experimental_pass_through/messages/test_anthropic_experimental_pass_through_messages_handler.py +++ b/tests/test_litellm/llms/anthropic/experimental_pass_through/messages/test_anthropic_experimental_pass_through_messages_handler.py @@ -221,9 +221,41 @@ def test_non_claude_model_converts_thinking_to_reasoning_effort(self): model="openai/gpt-5.2", ) - assert result == {"reasoning_effort": "minimal"} + assert result == {"reasoning_effort": {"effort": "minimal", "summary": "detailed"}} assert "thinking" not in result + def test_translate_thinking_for_model_no_summary_when_disabled(self): + """When disable_default_reasoning_summary is True, no summary is injected.""" + import litellm + from litellm.llms.anthropic.experimental_pass_through.adapters.transformation import ( + LiteLLMAnthropicMessagesAdapter, + ) + + original = litellm.disable_default_reasoning_summary + try: + litellm.disable_default_reasoning_summary = True + thinking = {"type": "enabled", "budget_tokens": 5000} + result = LiteLLMAnthropicMessagesAdapter.translate_thinking_for_model( + thinking=thinking, + model="openai/gpt-5.2", + ) + assert result == {"reasoning_effort": "medium"} + finally: + litellm.disable_default_reasoning_summary = original + + def test_translate_thinking_for_model_preserves_user_summary(self): + """User-provided summary is always preserved regardless of flag.""" + from litellm.llms.anthropic.experimental_pass_through.adapters.transformation import ( + LiteLLMAnthropicMessagesAdapter, + ) + + thinking = {"type": "enabled", "budget_tokens": 10000, "summary": "concise"} + result = LiteLLMAnthropicMessagesAdapter.translate_thinking_for_model( + thinking=thinking, + model="openai/gpt-5.2", + ) + assert result == {"reasoning_effort": {"effort": "high", "summary": "concise"}} + class TestThinkingSummaryPreservation: """Tests for thinking.summary preservation and disable_default_reasoning_summary flag."""