diff --git a/docs/my-website/blog/claude_opus_4_6/index.md b/docs/my-website/blog/claude_opus_4_6/index.md
index b1836cfaef5..75b088c533d 100644
--- a/docs/my-website/blog/claude_opus_4_6/index.md
+++ b/docs/my-website/blog/claude_opus_4_6/index.md
@@ -3,6 +3,10 @@ slug: claude_opus_4_6
title: "Day 0 Support: Claude Opus 4.6"
date: 2026-02-05T10:00:00
authors:
+ - name: Sameer Kankute
+ title: SWE @ LiteLLM (LLM Translation)
+ url: https://www.linkedin.com/in/sameer-kankute/
+ image_url: https://pbs.twimg.com/profile_images/2001352686994907136/ONgNuSk5_400x400.jpg
- name: Ishaan Jaff
title: "CTO, LiteLLM"
url: https://www.linkedin.com/in/reffajnaahsi/
@@ -219,6 +223,156 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
-## More Features Coming Soon
+## Compaction
+
+Litellm supports enabling compaction for the new claude-opus-4-6.
+
+### Enabling Compaction
+
+To enable compaction, add the `context_management` parameter with the `compact_20260112` edit type:
+
+```bash
+curl --location 'http://0.0.0.0:4000/chat/completions' \
+--header 'Content-Type: application/json' \
+--header 'Authorization: Bearer $LITELLM_KEY' \
+--data '{
+ "model": "claude-opus-4-6",
+ "messages": [
+ {
+ "role": "user",
+ "content": "What is the weather in San Francisco?"
+ }
+ ],
+ "context_management": {
+ "edits": [
+ {
+ "type": "compact_20260112"
+ }
+ ]
+ },
+ "max_tokens": 100
+}'
+```
+All the parameters supported for context_management by anthropic are supported and can be directly added. Litellm automatically adds the `compact-2026-01-12` beta header in the request.
+
+
+### Response with Compaction Block
+
+The response will include the compaction summary in `provider_specific_fields.compaction_blocks`:
+
+```json
+{
+ "id": "chatcmpl-a6c105a3-4b25-419e-9551-c800633b6cb2",
+ "created": 1770357619,
+ "model": "claude-opus-4-6",
+ "object": "chat.completion",
+ "choices": [
+ {
+ "finish_reason": "length",
+ "index": 0,
+ "message": {
+ "content": "I don't have access to real-time data, so I can't provide the current weather in San Francisco. To get up-to-date weather information, I'd recommend checking:\n\n- **Weather websites** like weather.com, accuweather.com, or wunderground.com\n- **Search engines** – just Google \"San Francisco weather\"\n- **Weather apps** on your phone (e.g., Apple Weather, Google Weather)\n- **National",
+ "role": "assistant",
+ "provider_specific_fields": {
+ "compaction_blocks": [
+ {
+ "type": "compaction",
+ "content": "Summary of the conversation: The user requested help building a web scraper..."
+ }
+ ]
+ }
+ }
+ }
+ ],
+ "usage": {
+ "completion_tokens": 100,
+ "prompt_tokens": 86,
+ "total_tokens": 186
+ }
+}
+```
+
+### Using Compaction Blocks in Follow-up Requests
+
+To continue the conversation with compaction, include the compaction block in the assistant message's `provider_specific_fields`:
+
+```bash
+curl --location 'http://0.0.0.0:4000/chat/completions' \
+--header 'Content-Type: application/json' \
+--header 'Authorization: Bearer $LITELLM_KEY' \
+--data '{
+ "model": "claude-opus-4-6",
+ "messages": [
+ {
+ "role": "user",
+ "content": "How can I build a web scraper?"
+ },
+ {
+ "role": "assistant",
+ "content": [
+ {
+ "type": "text",
+ "text": "Certainly! To build a basic web scraper, you'll typically use a programming language like Python along with libraries such as `requests` (for fetching web pages) and `BeautifulSoup` (for parsing HTML). Here's a basic example:\n\n```python\nimport requests\nfrom bs4 import BeautifulSoup\n\nurl = 'https://example.com'\nresponse = requests.get(url)\nsoup = BeautifulSoup(response.text, 'html.parser')\n\n# Extract and print all text\ntext = soup.get_text()\nprint(text)\n```\n\nLet me know what you're interested in scraping or if you need help with a specific website!"
+ }
+ ],
+ "provider_specific_fields": {
+ "compaction_blocks": [
+ {
+ "type": "compaction",
+ "content": "Summary of the conversation: The user asked how to build a web scraper, and the assistant gave an overview using Python with requests and BeautifulSoup."
+ }
+ ]
+ }
+ },
+ {
+ "role": "user",
+ "content": "How do I use it to scrape product prices?"
+ }
+ ],
+ "context_management": {
+ "edits": [
+ {
+ "type": "compact_20260112"
+ }
+ ]
+ },
+ "max_tokens": 100
+}'
+```
+
+### Streaming Support
+
+Compaction blocks are also supported in streaming mode. You'll receive:
+- `compaction_start` event when a compaction block begins
+- `compaction_delta` events with the compaction content
+- The accumulated `compaction_blocks` in `provider_specific_fields`
+
+
+## Effort Levels
+
+Four effort levels available: `low`, `medium`, `high` (default), and `max`. Pass directly via the `effort` parameter:
+
+```bash
+curl --location 'http://0.0.0.0:4000/chat/completions' \
+--header 'Content-Type: application/json' \
+--header 'Authorization: Bearer $LITELLM_KEY' \
+--data '{
+ "model": "claude-opus-4-6",
+ "messages": [
+ {
+ "role": "user",
+ "content": "Explain quantum computing"
+ }
+ ],
+ "effort": "max"
+}'
+```
+
+## 1M Token Context (Beta)
+
+Opus 4.6 supports 1M token context. Premium pricing applies for prompts exceeding 200k tokens ($10/$37.50 per million input/output tokens). LiteLLM supports cost calculations for 1M token contexts.
+
+## US-Only Inference
+
+Available at 1.1× token pricing. LiteLLM supports this pricing model.
-We're actively working on supporting new features for Claude Opus 4.6. Stay tuned for updates!
diff --git a/docs/my-website/docs/tutorials/claude_code_beta_headers.md b/docs/my-website/docs/tutorials/claude_code_beta_headers.md
new file mode 100644
index 00000000000..9c1645e0277
--- /dev/null
+++ b/docs/my-website/docs/tutorials/claude_code_beta_headers.md
@@ -0,0 +1,129 @@
+import Image from '@theme/IdealImage';
+
+# Claude Code - Fixing Invalid Beta Header Errors
+
+When using Claude Code with LiteLLM and non-Anthropic providers (Bedrock, Azure AI, Vertex AI), you may encounter "invalid beta header" errors. This guide explains how to fix these errors locally or contribute a fix to LiteLLM.
+
+## What Are Beta Headers?
+
+Anthropic uses beta headers to enable experimental features in Claude. When you use Claude Code, it may send beta headers like:
+
+```
+anthropic-beta: prompt-caching-scope-2026-01-05,advanced-tool-use-2025-11-20
+```
+
+However, not all providers support all Anthropic beta features. When an unsupported beta header is sent to a provider, you'll see an error.
+
+## Common Error Message
+
+```bash
+Error: The model returned the following errors: invalid beta flag
+```
+
+## How LiteLLM Handles Beta Headers
+
+LiteLLM automatically filters out unsupported beta headers using a configuration file:
+
+```
+litellm/litellm/anthropic_beta_headers_config.json
+```
+
+This JSON file lists which beta headers are **unsupported** for each provider. Headers not in the unsupported list are passed through to the provider.
+
+## Quick Fix: Update Config Locally
+
+If you encounter an invalid beta header error, you can fix it immediately by updating the config file locally.
+
+### Step 1: Locate the Config File
+
+Find the file in your LiteLLM installation:
+
+```bash
+# If installed via pip
+cd $(python -c "import litellm; import os; print(os.path.dirname(litellm.__file__))")
+
+# The config file is at:
+# litellm/anthropic_beta_headers_config.json
+```
+
+### Step 2: Add the Unsupported Header
+
+Open `anthropic_beta_headers_config.json` and add the problematic header to the appropriate provider's list:
+
+```json title="anthropic_beta_headers_config.json"
+{
+ "description": "Unsupported Anthropic beta headers for each provider. Headers listed here will be dropped. Headers not listed are passed through as-is.",
+ "anthropic": [],
+ "azure_ai": [],
+ "bedrock_converse": [
+ "prompt-caching-scope-2026-01-05",
+ "bash_20250124",
+ "bash_20241022",
+ "text_editor_20250124",
+ "text_editor_20241022",
+ "compact-2026-01-12",
+ "advanced-tool-use-2025-11-20",
+ "web-fetch-2025-09-10",
+ "code-execution-2025-08-25",
+ "skills-2025-10-02",
+ "files-api-2025-04-14"
+ ],
+ "bedrock": [
+ "advanced-tool-use-2025-11-20",
+ "prompt-caching-scope-2026-01-05",
+ "structured-outputs-2025-11-13",
+ "web-fetch-2025-09-10",
+ "code-execution-2025-08-25",
+ "skills-2025-10-02",
+ "files-api-2025-04-14"
+ ],
+ "vertex_ai": [
+ "prompt-caching-scope-2026-01-05"
+ ]
+}
+```
+
+### Step 3: Restart Your Application
+
+After updating the config file, restart your LiteLLM proxy or application:
+
+```bash
+# If using LiteLLM proxy
+litellm --config config.yaml
+
+# If using Python SDK
+# Just restart your Python application
+```
+
+The updated configuration will be loaded automatically.
+
+## Contributing a Fix to LiteLLM
+
+Help the community by contributing your fix! If your local changes work, please raise a PR with the addition of the header and we will merge it.
+
+
+## How Beta Header Filtering Works
+
+When you make a request through LiteLLM:
+
+```mermaid
+sequenceDiagram
+ participant CC as Claude Code
+ participant LP as LiteLLM
+ participant Config as Beta Headers Config
+ participant Provider as Provider (Bedrock/Azure/etc)
+
+ CC->>LP: Request with beta headers
+ Note over CC,LP: anthropic-beta: header1,header2,header3
+
+ LP->>Config: Load unsupported headers for provider
+ Config-->>LP: Returns unsupported list
+
+ Note over LP: Filter headers:
- Remove unsupported
- Keep supported
+
+ LP->>Provider: Request with filtered headers
+ Note over LP,Provider: anthropic-beta: header2
(header1, header3 removed)
+
+ Provider-->>LP: Success response
+ LP-->>CC: Response
+```
\ No newline at end of file
diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js
index fda0e3be4e4..688ad714370 100644
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@@ -129,6 +129,7 @@ const sidebars = {
"tutorials/claude_mcp",
"tutorials/claude_non_anthropic_models",
"tutorials/claude_code_plugin_marketplace",
+ "tutorials/claude_code_beta_headers",
]
},
"tutorials/opencode_integration",
diff --git a/litellm/anthropic_beta_headers_config.json b/litellm/anthropic_beta_headers_config.json
new file mode 100644
index 00000000000..193091c0176
--- /dev/null
+++ b/litellm/anthropic_beta_headers_config.json
@@ -0,0 +1,30 @@
+{
+ "description": "Unsupported Anthropic beta headers for each provider. Headers listed here will be dropped. Headers not listed are passed through as-is.",
+ "anthropic": [],
+ "azure_ai": [],
+ "bedrock_converse": [
+ "prompt-caching-scope-2026-01-05",
+ "bash_20250124",
+ "bash_20241022",
+ "text_editor_20250124",
+ "text_editor_20241022",
+ "compact-2026-01-12",
+ "advanced-tool-use-2025-11-20",
+ "web-fetch-2025-09-10",
+ "code-execution-2025-08-25",
+ "skills-2025-10-02",
+ "files-api-2025-04-14"
+ ],
+ "bedrock": [
+ "advanced-tool-use-2025-11-20",
+ "prompt-caching-scope-2026-01-05",
+ "structured-outputs-2025-11-13",
+ "web-fetch-2025-09-10",
+ "code-execution-2025-08-25",
+ "skills-2025-10-02",
+ "files-api-2025-04-14"
+ ],
+ "vertex_ai": [
+ "prompt-caching-scope-2026-01-05"
+ ]
+}
diff --git a/litellm/anthropic_beta_headers_manager.py b/litellm/anthropic_beta_headers_manager.py
new file mode 100644
index 00000000000..2643f4c03fa
--- /dev/null
+++ b/litellm/anthropic_beta_headers_manager.py
@@ -0,0 +1,221 @@
+"""
+Centralized manager for Anthropic beta headers across different providers.
+
+This module provides utilities to:
+1. Load beta header configuration from JSON (lists unsupported headers per provider)
+2. Filter out unsupported beta headers
+3. Handle provider-specific header name mappings (e.g., advanced-tool-use -> tool-search-tool)
+
+Design:
+- JSON config lists UNSUPPORTED headers for each provider
+- Headers not in the unsupported list are passed through
+- Header mappings allow renaming headers for specific providers
+"""
+
+import json
+import os
+from typing import Dict, List, Optional, Set
+
+from litellm.litellm_core_utils.litellm_logging import verbose_logger
+
+# Cache for the loaded configuration
+_BETA_HEADERS_CONFIG: Optional[Dict] = None
+
+
+def _load_beta_headers_config() -> Dict:
+ """
+ Load the beta headers configuration from JSON file.
+ Uses caching to avoid repeated file reads.
+
+ Returns:
+ Dict containing the beta headers configuration
+ """
+ global _BETA_HEADERS_CONFIG
+
+ if _BETA_HEADERS_CONFIG is not None:
+ return _BETA_HEADERS_CONFIG
+
+ config_path = os.path.join(
+ os.path.dirname(__file__),
+ "anthropic_beta_headers_config.json"
+ )
+
+ try:
+ with open(config_path, "r") as f:
+ _BETA_HEADERS_CONFIG = json.load(f)
+ verbose_logger.debug(f"Loaded beta headers config from {config_path}")
+ return _BETA_HEADERS_CONFIG
+ except Exception as e:
+ verbose_logger.error(f"Failed to load beta headers config: {e}")
+ # Return empty config as fallback
+ return {
+ "anthropic": [],
+ "azure_ai": [],
+ "bedrock": [],
+ "bedrock_converse": [],
+ "vertex_ai": []
+ }
+
+
+def get_provider_name(provider: str) -> str:
+ """
+ Resolve provider aliases to canonical provider names.
+
+ Args:
+ provider: Provider name (may be an alias)
+
+ Returns:
+ Canonical provider name
+ """
+ config = _load_beta_headers_config()
+ aliases = config.get("provider_aliases", {})
+ return aliases.get(provider, provider)
+
+
+def filter_and_transform_beta_headers(
+ beta_headers: List[str],
+ provider: str,
+) -> List[str]:
+ """
+ Filter beta headers based on provider's unsupported list.
+
+ This function:
+ 1. Removes headers that are in the provider's unsupported list
+ 2. Passes through all other headers as-is
+
+ Note: Header transformations/mappings (e.g., advanced-tool-use -> tool-search-tool)
+ are handled in each provider's transformation code, not here.
+
+ Args:
+ beta_headers: List of Anthropic beta header values
+ provider: Provider name (e.g., "anthropic", "bedrock", "vertex_ai")
+
+ Returns:
+ List of filtered beta headers for the provider
+ """
+ if not beta_headers:
+ return []
+
+ config = _load_beta_headers_config()
+ provider = get_provider_name(provider)
+
+ # Get unsupported headers for this provider
+ unsupported_headers = set(config.get(provider, []))
+
+ filtered_headers: Set[str] = set()
+
+ for header in beta_headers:
+ header = header.strip()
+
+ # Skip if header is unsupported
+ if header in unsupported_headers:
+ verbose_logger.debug(
+ f"Dropping unsupported beta header '{header}' for provider '{provider}'"
+ )
+ continue
+
+ # Pass through as-is
+ filtered_headers.add(header)
+
+ return sorted(list(filtered_headers))
+
+
+def is_beta_header_supported(
+ beta_header: str,
+ provider: str,
+) -> bool:
+ """
+ Check if a specific beta header is supported by a provider.
+
+ Args:
+ beta_header: The Anthropic beta header value
+ provider: Provider name
+
+ Returns:
+ True if the header is supported (not in unsupported list), False otherwise
+ """
+ config = _load_beta_headers_config()
+ provider = get_provider_name(provider)
+ unsupported_headers = set(config.get(provider, []))
+ return beta_header not in unsupported_headers
+
+
+def get_provider_beta_header(
+ anthropic_beta_header: str,
+ provider: str,
+) -> Optional[str]:
+ """
+ Check if a beta header is supported by a provider.
+
+ Note: This does NOT handle header transformations/mappings.
+ Those are handled in each provider's transformation code.
+
+ Args:
+ anthropic_beta_header: The Anthropic beta header value
+ provider: Provider name
+
+ Returns:
+ The original header if supported, or None if unsupported
+ """
+ config = _load_beta_headers_config()
+ provider = get_provider_name(provider)
+
+ # Check if unsupported
+ unsupported_headers = set(config.get(provider, []))
+ if anthropic_beta_header in unsupported_headers:
+ return None
+
+ return anthropic_beta_header
+
+
+def update_headers_with_filtered_beta(
+ headers: dict,
+ provider: str,
+) -> dict:
+ """
+ Update headers dict by filtering and transforming anthropic-beta header values.
+ Modifies the headers dict in place and returns it.
+
+ Args:
+ headers: Request headers dict (will be modified in place)
+ provider: Provider name
+
+ Returns:
+ Updated headers dict
+ """
+ existing_beta = headers.get("anthropic-beta")
+ if not existing_beta:
+ return headers
+
+ # Parse existing beta headers
+ beta_values = [b.strip() for b in existing_beta.split(",") if b.strip()]
+
+ # Filter and transform based on provider
+ filtered_beta_values = filter_and_transform_beta_headers(
+ beta_headers=beta_values,
+ provider=provider,
+ )
+
+ # Update or remove the header
+ if filtered_beta_values:
+ headers["anthropic-beta"] = ",".join(filtered_beta_values)
+ else:
+ # Remove the header if no values remain
+ headers.pop("anthropic-beta", None)
+
+ return headers
+
+
+def get_unsupported_headers(provider: str) -> List[str]:
+ """
+ Get all beta headers that are unsupported by a provider.
+
+ Args:
+ provider: Provider name
+
+ Returns:
+ List of unsupported Anthropic beta header names
+ """
+ config = _load_beta_headers_config()
+ provider = get_provider_name(provider)
+ return config.get(provider, [])
diff --git a/litellm/litellm_core_utils/core_helpers.py b/litellm/litellm_core_utils/core_helpers.py
index 00695cbfb5b..7c8e2ebeaff 100644
--- a/litellm/litellm_core_utils/core_helpers.py
+++ b/litellm/litellm_core_utils/core_helpers.py
@@ -94,8 +94,8 @@ def map_finish_reason(
return "length"
elif finish_reason == "tool_use": # anthropic
return "tool_calls"
- elif finish_reason == "content_filtered":
- return "content_filter"
+ elif finish_reason == "compaction":
+ return "length"
return finish_reason
diff --git a/litellm/litellm_core_utils/prompt_templates/factory.py b/litellm/litellm_core_utils/prompt_templates/factory.py
index 53d2ca2f23f..f9ecd78ff1c 100644
--- a/litellm/litellm_core_utils/prompt_templates/factory.py
+++ b/litellm/litellm_core_utils/prompt_templates/factory.py
@@ -2190,6 +2190,16 @@ def anthropic_messages_pt( # noqa: PLR0915
while msg_i < len(messages) and messages[msg_i]["role"] == "assistant":
assistant_content_block: ChatCompletionAssistantMessage = messages[msg_i] # type: ignore
+ # Extract compaction_blocks from provider_specific_fields and add them first
+ _provider_specific_fields_raw = assistant_content_block.get(
+ "provider_specific_fields"
+ )
+ if isinstance(_provider_specific_fields_raw, dict):
+ _compaction_blocks = _provider_specific_fields_raw.get("compaction_blocks")
+ if _compaction_blocks and isinstance(_compaction_blocks, list):
+ # Add compaction blocks at the beginning of assistant content : https://platform.claude.com/docs/en/build-with-claude/compaction
+ assistant_content.extend(_compaction_blocks) # type: ignore
+
thinking_blocks = assistant_content_block.get("thinking_blocks", None)
if (
thinking_blocks is not None
diff --git a/litellm/llms/anthropic/chat/handler.py b/litellm/llms/anthropic/chat/handler.py
index 6a9aafd076b..485e95d6489 100644
--- a/litellm/llms/anthropic/chat/handler.py
+++ b/litellm/llms/anthropic/chat/handler.py
@@ -512,6 +512,9 @@ def __init__(
# Accumulate web_search_tool_result blocks for multi-turn reconstruction
# See: https://github.com/BerriAI/litellm/issues/17737
self.web_search_results: List[Dict[str, Any]] = []
+
+ # Accumulate compaction blocks for multi-turn reconstruction
+ self.compaction_blocks: List[Dict[str, Any]] = []
def check_empty_tool_call_args(self) -> bool:
"""
@@ -592,6 +595,12 @@ def _content_block_delta_helper(self, chunk: dict) -> Tuple[
)
]
provider_specific_fields["thinking_blocks"] = thinking_blocks
+ elif "content" in content_block["delta"] and content_block["delta"].get("type") == "compaction_delta":
+ # Handle compaction delta
+ provider_specific_fields["compaction_delta"] = {
+ "type": "compaction_delta",
+ "content": content_block["delta"]["content"]
+ }
return text, tool_use, thinking_blocks, provider_specific_fields
@@ -721,6 +730,20 @@ def chunk_parser(self, chunk: dict) -> ModelResponseStream: # noqa: PLR0915
provider_specific_fields=provider_specific_fields,
)
+ elif content_block_start["content_block"]["type"] == "compaction":
+ # Handle compaction blocks
+ # The full content comes in content_block_start
+ self.compaction_blocks.append(
+ content_block_start["content_block"]
+ )
+ provider_specific_fields["compaction_blocks"] = (
+ self.compaction_blocks
+ )
+ provider_specific_fields["compaction_start"] = {
+ "type": "compaction",
+ "content": content_block_start["content_block"].get("content", "")
+ }
+
elif content_block_start["content_block"]["type"].endswith("_tool_result"):
# Handle all tool result types (web_search, bash_code_execution, text_editor, etc.)
content_type = content_block_start["content_block"]["type"]
diff --git a/litellm/llms/anthropic/chat/transformation.py b/litellm/llms/anthropic/chat/transformation.py
index 1b61b533275..02b8d952445 100644
--- a/litellm/llms/anthropic/chat/transformation.py
+++ b/litellm/llms/anthropic/chat/transformation.py
@@ -170,9 +170,10 @@ def convert_tool_use_to_openai_format(
tool_call["caller"] = cast(Dict[str, Any], anthropic_tool_content["caller"]) # type: ignore[typeddict-item]
return tool_call
- def _is_claude_opus_4_5(self, model: str) -> bool:
+ @staticmethod
+ def _is_claude_opus_4_6(model: str) -> bool:
"""Check if the model is Claude Opus 4.5."""
- return "opus-4-5" in model.lower() or "opus_4_5" in model.lower()
+ return "opus-4-6" in model.lower() or "opus_4_6" in model.lower()
def get_supported_openai_params(self, model: str):
params = [
@@ -659,32 +660,38 @@ def _map_stop_sequences(
@staticmethod
def _map_reasoning_effort(
- reasoning_effort: Optional[Union[REASONING_EFFORT, str]],
+ reasoning_effort: Optional[Union[REASONING_EFFORT, str]],
+ model: str,
) -> Optional[AnthropicThinkingParam]:
- if reasoning_effort is None:
- return None
- elif reasoning_effort == "low":
- return AnthropicThinkingParam(
- type="enabled",
- budget_tokens=DEFAULT_REASONING_EFFORT_LOW_THINKING_BUDGET,
- )
- elif reasoning_effort == "medium":
- return AnthropicThinkingParam(
- type="enabled",
- budget_tokens=DEFAULT_REASONING_EFFORT_MEDIUM_THINKING_BUDGET,
- )
- elif reasoning_effort == "high":
- return AnthropicThinkingParam(
- type="enabled",
- budget_tokens=DEFAULT_REASONING_EFFORT_HIGH_THINKING_BUDGET,
- )
- elif reasoning_effort == "minimal":
+ if AnthropicConfig._is_claude_opus_4_6(model):
return AnthropicThinkingParam(
- type="enabled",
- budget_tokens=DEFAULT_REASONING_EFFORT_MINIMAL_THINKING_BUDGET,
+ type="adaptive",
)
else:
- raise ValueError(f"Unmapped reasoning effort: {reasoning_effort}")
+ if reasoning_effort is None:
+ return None
+ elif reasoning_effort == "low":
+ return AnthropicThinkingParam(
+ type="enabled",
+ budget_tokens=DEFAULT_REASONING_EFFORT_LOW_THINKING_BUDGET,
+ )
+ elif reasoning_effort == "medium":
+ return AnthropicThinkingParam(
+ type="enabled",
+ budget_tokens=DEFAULT_REASONING_EFFORT_MEDIUM_THINKING_BUDGET,
+ )
+ elif reasoning_effort == "high":
+ return AnthropicThinkingParam(
+ type="enabled",
+ budget_tokens=DEFAULT_REASONING_EFFORT_HIGH_THINKING_BUDGET,
+ )
+ elif reasoning_effort == "minimal":
+ return AnthropicThinkingParam(
+ type="enabled",
+ budget_tokens=DEFAULT_REASONING_EFFORT_MINIMAL_THINKING_BUDGET,
+ )
+ else:
+ raise ValueError(f"Unmapped reasoning effort: {reasoning_effort}")
def _extract_json_schema_from_response_format(
self, value: Optional[dict]
@@ -860,13 +867,8 @@ def map_openai_params( # noqa: PLR0915
if param == "thinking":
optional_params["thinking"] = value
elif param == "reasoning_effort" and isinstance(value, str):
- # For Claude Opus 4.5, map reasoning_effort to output_config
- if self._is_claude_opus_4_5(model):
- optional_params["output_config"] = {"effort": value}
-
- # For other models, map to thinking parameter
optional_params["thinking"] = AnthropicConfig._map_reasoning_effort(
- value
+ reasoning_effort=value, model=model
)
elif param == "web_search_options" and isinstance(value, dict):
hosted_web_search_tool = self.map_web_search_tool(
@@ -877,6 +879,9 @@ def map_openai_params( # noqa: PLR0915
)
elif param == "extra_headers":
optional_params["extra_headers"] = value
+ elif param == "context_management" and isinstance(value, dict):
+ # Pass through Anthropic-specific context_management parameter
+ optional_params["context_management"] = value
## handle thinking tokens
self.update_optional_params_with_thinking_tokens(
@@ -1026,9 +1031,37 @@ def _ensure_beta_header(self, headers: dict, beta_value: str) -> None:
if beta_value not in existing_values:
headers["anthropic-beta"] = f"{existing_beta}, {beta_value}"
- def _ensure_context_management_beta_header(self, headers: dict) -> None:
- beta_value = ANTHROPIC_BETA_HEADER_VALUES.CONTEXT_MANAGEMENT_2025_06_27.value
- self._ensure_beta_header(headers, beta_value)
+ def _ensure_context_management_beta_header(
+ self, headers: dict, context_management: dict
+ ) -> None:
+ """
+ Add appropriate beta headers based on context_management edits.
+ - If any edit has type "compact_20260112", add compact-2026-01-12 header
+ - For all other edits, add context-management-2025-06-27 header
+ """
+ edits = context_management.get("edits", [])
+
+ has_compact = False
+ has_other = False
+
+ for edit in edits:
+ edit_type = edit.get("type", "")
+ if edit_type == "compact_20260112":
+ has_compact = True
+ else:
+ has_other = True
+
+ # Add compact header if any compact edits exist
+ if has_compact:
+ self._ensure_beta_header(
+ headers, ANTHROPIC_BETA_HEADER_VALUES.COMPACT_2026_01_12.value
+ )
+
+ # Add context management header if any other edits exist
+ if has_other:
+ self._ensure_beta_header(
+ headers, ANTHROPIC_BETA_HEADER_VALUES.CONTEXT_MANAGEMENT_2025_06_27.value
+ )
def update_headers_with_optional_anthropic_beta(
self, headers: dict, optional_params: dict
@@ -1056,7 +1089,9 @@ def update_headers_with_optional_anthropic_beta(
headers, ANTHROPIC_BETA_HEADER_VALUES.CONTEXT_MANAGEMENT_2025_06_27.value
)
if optional_params.get("context_management") is not None:
- self._ensure_context_management_beta_header(headers)
+ self._ensure_context_management_beta_header(
+ headers, optional_params["context_management"]
+ )
if optional_params.get("output_format") is not None:
self._ensure_beta_header(
headers, ANTHROPIC_BETA_HEADER_VALUES.STRUCTURED_OUTPUT_2025_09_25.value
@@ -1225,6 +1260,7 @@ def extract_response_content(self, completion_response: dict) -> Tuple[
List[ChatCompletionToolCallChunk],
Optional[List[Any]],
Optional[List[Any]],
+ Optional[List[Any]],
]:
text_content = ""
citations: Optional[List[Any]] = None
@@ -1237,6 +1273,7 @@ def extract_response_content(self, completion_response: dict) -> Tuple[
tool_calls: List[ChatCompletionToolCallChunk] = []
web_search_results: Optional[List[Any]] = None
tool_results: Optional[List[Any]] = None
+ compaction_blocks: Optional[List[Any]] = None
for idx, content in enumerate(completion_response["content"]):
if content["type"] == "text":
text_content += content["text"]
@@ -1278,6 +1315,12 @@ def extract_response_content(self, completion_response: dict) -> Tuple[
thinking_blocks.append(
cast(ChatCompletionRedactedThinkingBlock, content)
)
+
+ ## COMPACTION
+ elif content["type"] == "compaction":
+ if compaction_blocks is None:
+ compaction_blocks = []
+ compaction_blocks.append(content)
## CITATIONS
if content.get("citations") is not None:
@@ -1299,7 +1342,7 @@ def extract_response_content(self, completion_response: dict) -> Tuple[
if thinking_content is not None:
reasoning_content += thinking_content
- return text_content, citations, thinking_blocks, reasoning_content, tool_calls, web_search_results, tool_results
+ return text_content, citations, thinking_blocks, reasoning_content, tool_calls, web_search_results, tool_results, compaction_blocks
def calculate_usage(
self,
@@ -1316,6 +1359,10 @@ def calculate_usage(
cache_creation_token_details: Optional[CacheCreationTokenDetails] = None
web_search_requests: Optional[int] = None
tool_search_requests: Optional[int] = None
+ inference_geo: Optional[str] = None
+ if "inference_geo" in _usage and _usage["inference_geo"] is not None:
+ inference_geo = _usage["inference_geo"]
+
if (
"cache_creation_input_tokens" in _usage
and _usage["cache_creation_input_tokens"] is not None
@@ -1399,6 +1446,7 @@ def calculate_usage(
if (web_search_requests is not None or tool_search_requests is not None)
else None
),
+ inference_geo=inference_geo,
)
return usage
@@ -1442,6 +1490,7 @@ def transform_parsed_response(
tool_calls,
web_search_results,
tool_results,
+ compaction_blocks,
) = self.extract_response_content(completion_response=completion_response)
if (
@@ -1469,6 +1518,8 @@ def transform_parsed_response(
provider_specific_fields["tool_results"] = tool_results
if container is not None:
provider_specific_fields["container"] = container
+ if compaction_blocks is not None:
+ provider_specific_fields["compaction_blocks"] = compaction_blocks
_message = litellm.Message(
tool_calls=tool_calls,
@@ -1477,6 +1528,7 @@ def transform_parsed_response(
thinking_blocks=thinking_blocks,
reasoning_content=reasoning_content,
)
+ _message.provider_specific_fields = provider_specific_fields
## HANDLE JSON MODE - anthropic returns single function call
json_mode_message = self._transform_response_for_json_mode(
@@ -1507,18 +1559,7 @@ def transform_parsed_response(
model_response.created = int(time.time())
model_response.model = completion_response["model"]
- context_management_response = completion_response.get("context_management")
- if context_management_response is not None:
- _hidden_params["context_management"] = context_management_response
- try:
- model_response.__dict__["context_management"] = (
- context_management_response
- )
- except Exception:
- pass
-
model_response._hidden_params = _hidden_params
-
return model_response
def get_prefix_prompt(self, messages: List[AllMessageValues]) -> Optional[str]:
diff --git a/litellm/llms/anthropic/cost_calculation.py b/litellm/llms/anthropic/cost_calculation.py
index 8f34eb00ce5..11b61cc92f0 100644
--- a/litellm/llms/anthropic/cost_calculation.py
+++ b/litellm/llms/anthropic/cost_calculation.py
@@ -22,10 +22,17 @@ def cost_per_token(model: str, usage: "Usage") -> Tuple[float, float]:
Returns:
Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
"""
- return generic_cost_per_token(
- model=model, usage=usage, custom_llm_provider="anthropic"
+ # If usage has inference_geo, prepend it as prefix to model name
+ if hasattr(usage, "inference_geo") and usage.inference_geo and usage.inference_geo.lower() not in ["global", "not_available"]:
+ model_with_geo_prefix = f"{usage.inference_geo}/{model}"
+ else:
+ model_with_geo_prefix = model
+ prompt_cost, completion_cost = generic_cost_per_token(
+ model=model_with_geo_prefix, usage=usage, custom_llm_provider="anthropic"
)
+ return prompt_cost, completion_cost
+
def get_cost_for_anthropic_web_search(
model_info: Optional["ModelInfo"] = None,
diff --git a/litellm/llms/anthropic/experimental_pass_through/messages/transformation.py b/litellm/llms/anthropic/experimental_pass_through/messages/transformation.py
index 308bf367d06..bb40f9df266 100644
--- a/litellm/llms/anthropic/experimental_pass_through/messages/transformation.py
+++ b/litellm/llms/anthropic/experimental_pass_through/messages/transformation.py
@@ -2,6 +2,9 @@
import httpx
+from litellm.anthropic_beta_headers_manager import (
+ update_headers_with_filtered_beta,
+)
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.litellm_core_utils.litellm_logging import verbose_logger
from litellm.llms.base_llm.anthropic_messages.transformation import (
@@ -90,6 +93,11 @@ def validate_anthropic_messages_environment(
optional_params=optional_params,
)
+ headers = update_headers_with_filtered_beta(
+ headers=headers,
+ provider="anthropic",
+ )
+
return headers, api_base
def transform_anthropic_messages_request(
@@ -189,8 +197,27 @@ def _update_headers_with_anthropic_beta(
beta_values.update(b.strip() for b in existing_beta.split(","))
# Check for context management
- if optional_params.get("context_management") is not None:
- beta_values.add(ANTHROPIC_BETA_HEADER_VALUES.CONTEXT_MANAGEMENT_2025_06_27.value)
+ context_management_param = optional_params.get("context_management")
+ if context_management_param is not None:
+ # Check edits array for compact_20260112 type
+ edits = context_management_param.get("edits", [])
+ has_compact = False
+ has_other = False
+
+ for edit in edits:
+ edit_type = edit.get("type", "")
+ if edit_type == "compact_20260112":
+ has_compact = True
+ else:
+ has_other = True
+
+ # Add compact header if any compact edits exist
+ if has_compact:
+ beta_values.add(ANTHROPIC_BETA_HEADER_VALUES.COMPACT_2026_01_12.value)
+
+ # Add context management header if any other edits exist
+ if has_other:
+ beta_values.add(ANTHROPIC_BETA_HEADER_VALUES.CONTEXT_MANAGEMENT_2025_06_27.value)
# Check for structured outputs
if optional_params.get("output_format") is not None:
diff --git a/litellm/llms/azure_ai/anthropic/transformation.py b/litellm/llms/azure_ai/anthropic/transformation.py
index 2d8d3b987c7..753bc9c08eb 100644
--- a/litellm/llms/azure_ai/anthropic/transformation.py
+++ b/litellm/llms/azure_ai/anthropic/transformation.py
@@ -3,6 +3,9 @@
"""
from typing import TYPE_CHECKING, Dict, List, Optional, Union
+from litellm.anthropic_beta_headers_manager import (
+ update_headers_with_filtered_beta,
+)
from litellm.llms.anthropic.chat.transformation import AnthropicConfig
from litellm.llms.azure.common_utils import BaseAzureLLM
from litellm.types.llms.openai import AllMessageValues
@@ -87,6 +90,12 @@ def validate_environment(
if "anthropic-version" not in headers:
headers["anthropic-version"] = "2023-06-01"
+ # Filter out unsupported beta headers for Azure AI
+ headers = update_headers_with_filtered_beta(
+ headers=headers,
+ provider="azure_ai",
+ )
+
return headers
def transform_request(
diff --git a/litellm/llms/bedrock/chat/converse_transformation.py b/litellm/llms/bedrock/chat/converse_transformation.py
index 6591e152a14..7fc51263ebb 100644
--- a/litellm/llms/bedrock/chat/converse_transformation.py
+++ b/litellm/llms/bedrock/chat/converse_transformation.py
@@ -11,6 +11,9 @@
import litellm
from litellm._logging import verbose_logger
+from litellm.anthropic_beta_headers_manager import (
+ filter_and_transform_beta_headers,
+)
from litellm.constants import RESPONSE_FORMAT_TOOL_NAME
from litellm.litellm_core_utils.core_helpers import (
filter_exceptions_from_params,
@@ -30,8 +33,6 @@
from litellm.llms.anthropic.chat.transformation import AnthropicConfig
from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException
from litellm.types.llms.bedrock import *
-
-from ..common_utils import is_claude_4_5_on_bedrock
from litellm.types.llms.openai import (
AllMessageValues,
ChatCompletionAssistantMessage,
@@ -68,6 +69,7 @@
BedrockModelInfo,
get_anthropic_beta_from_headers,
get_bedrock_tool_name,
+ is_claude_4_5_on_bedrock,
)
# Computer use tool prefixes supported by Bedrock
@@ -83,6 +85,7 @@
UNSUPPORTED_BEDROCK_CONVERSE_BETA_PATTERNS = [
"advanced-tool-use", # Bedrock Converse doesn't support advanced-tool-use beta headers
"prompt-caching", # Prompt caching not supported in Converse API
+ "compact-2026-01-12", # The compact beta feature is not currently supported on the Converse and ConverseStream APIs
]
@@ -431,7 +434,7 @@ def _handle_reasoning_effort_parameter(
else:
# Anthropic and other models: convert to thinking parameter
optional_params["thinking"] = AnthropicConfig._map_reasoning_effort(
- reasoning_effort
+ reasoning_effort=reasoning_effort, model=model
)
def get_supported_openai_params(self, model: str) -> List[str]:
@@ -617,37 +620,6 @@ def _transform_computer_use_tools(
return transformed_tools
- def _filter_unsupported_beta_headers_for_bedrock(
- self, model: str, beta_list: list
- ) -> list:
- """
- Remove beta headers that are not supported on Bedrock Converse API for the given model.
-
- Extended thinking beta headers are only supported on specific Claude 4+ models.
- Some beta headers are universally unsupported on Bedrock Converse API.
-
- Args:
- model: The model name
- beta_list: The list of beta headers to filter
-
- Returns:
- Filtered list of beta headers
- """
- filtered_betas = []
-
- # 1. Filter out beta headers that are universally unsupported on Bedrock Converse
- for beta in beta_list:
- should_keep = True
- for unsupported_pattern in UNSUPPORTED_BEDROCK_CONVERSE_BETA_PATTERNS:
- if unsupported_pattern in beta.lower():
- should_keep = False
- break
-
- if should_keep:
- filtered_betas.append(beta)
-
- return filtered_betas
-
def _separate_computer_use_tools(
self, tools: List[OpenAIChatCompletionToolParam], model: str
) -> Tuple[
@@ -1124,7 +1096,28 @@ def _process_tools_and_beta(
# Add computer use tools and anthropic_beta if needed (only when computer use tools are present)
if computer_use_tools:
- anthropic_beta_list.append("computer-use-2024-10-22")
+ # Determine the correct computer-use beta header based on model
+ # "computer-use-2025-11-24" for Claude Opus 4.6, Claude Opus 4.5
+ # "computer-use-2025-01-24" for Claude Sonnet 4.5, Haiku 4.5, Opus 4.1, Sonnet 4, Opus 4, and Sonnet 3.7
+ # "computer-use-2024-10-22" for older models
+ model_lower = model.lower()
+ if "opus-4.6" in model_lower or "opus_4.6" in model_lower or "opus-4-6" in model_lower or "opus_4_6" in model_lower:
+ computer_use_header = "computer-use-2025-11-24"
+ elif "opus-4.5" in model_lower or "opus_4.5" in model_lower or "opus-4-5" in model_lower or "opus_4_5" in model_lower:
+ computer_use_header = "computer-use-2025-11-24"
+ elif any(pattern in model_lower for pattern in [
+ "sonnet-4.5", "sonnet_4.5", "sonnet-4-5", "sonnet_4_5",
+ "haiku-4.5", "haiku_4.5", "haiku-4-5", "haiku_4_5",
+ "opus-4.1", "opus_4.1", "opus-4-1", "opus_4_1",
+ "sonnet-4", "sonnet_4",
+ "opus-4", "opus_4",
+ "sonnet-3.7", "sonnet_3.7", "sonnet-3-7", "sonnet_3_7"
+ ]):
+ computer_use_header = "computer-use-2025-01-24"
+ else:
+ computer_use_header = "computer-use-2024-10-22"
+
+ anthropic_beta_list.append(computer_use_header)
# Transform computer use tools to proper Bedrock format
transformed_computer_tools = self._transform_computer_use_tools(
computer_use_tools
@@ -1150,13 +1143,13 @@ def _process_tools_and_beta(
unique_betas.append(beta)
seen.add(beta)
- # Filter out unsupported beta headers for Bedrock Converse API
- filtered_betas = self._filter_unsupported_beta_headers_for_bedrock(
- model=model,
- beta_list=unique_betas,
+ filtered_betas = filter_and_transform_beta_headers(
+ beta_headers=unique_betas,
+ provider="bedrock_converse",
)
-
- additional_request_params["anthropic_beta"] = filtered_betas
+
+ if filtered_betas:
+ additional_request_params["anthropic_beta"] = filtered_betas
return bedrock_tools, anthropic_beta_list
diff --git a/litellm/llms/bedrock/messages/invoke_transformations/anthropic_claude3_transformation.py b/litellm/llms/bedrock/messages/invoke_transformations/anthropic_claude3_transformation.py
index 90de67a822f..19fe7d8c140 100644
--- a/litellm/llms/bedrock/messages/invoke_transformations/anthropic_claude3_transformation.py
+++ b/litellm/llms/bedrock/messages/invoke_transformations/anthropic_claude3_transformation.py
@@ -12,6 +12,9 @@
import httpx
+from litellm.anthropic_beta_headers_manager import (
+ filter_and_transform_beta_headers,
+)
from litellm.llms.anthropic.common_utils import AnthropicModelInfo
from litellm.llms.anthropic.experimental_pass_through.messages.transformation import (
AnthropicMessagesConfig,
@@ -55,10 +58,6 @@ class AmazonAnthropicClaudeMessagesConfig(
# Beta header patterns that are not supported by Bedrock Invoke API
# These will be filtered out to prevent 400 "invalid beta flag" errors
- UNSUPPORTED_BEDROCK_INVOKE_BETA_PATTERNS = [
- "advanced-tool-use", # Bedrock Invoke doesn't support advanced-tool-use beta headers
- "prompt-caching-scope",
- ]
def __init__(self, **kwargs):
BaseAnthropicMessagesConfig.__init__(self, **kwargs)
@@ -276,39 +275,48 @@ def _filter_unsupported_beta_headers_for_bedrock(
model: The model name
beta_set: The set of beta headers to filter in-place
"""
- beta_headers_to_remove = set()
- has_advanced_tool_use = False
-
- # 1. Filter out beta headers that are universally unsupported on Bedrock Invoke and track if advanced-tool-use header is present
- for beta in beta_set:
- for unsupported_pattern in self.UNSUPPORTED_BEDROCK_INVOKE_BETA_PATTERNS:
- if unsupported_pattern in beta.lower():
- beta_headers_to_remove.add(beta)
- has_advanced_tool_use = True
- break
-
- # 2. Filter out extended thinking headers for models that don't support them
+ # 1. Handle header transformations BEFORE filtering
+ # (advanced-tool-use -> tool-search-tool)
+ # This must happen before filtering because advanced-tool-use is in the unsupported list
+ has_advanced_tool_use = "advanced-tool-use-2025-11-20" in beta_set
+ if has_advanced_tool_use and self._supports_tool_search_on_bedrock(model):
+ beta_set.discard("advanced-tool-use-2025-11-20")
+ beta_set.add("tool-search-tool-2025-10-19")
+ beta_set.add("tool-examples-2025-10-29")
+
+ # 2. Apply provider-level filtering using centralized JSON config
+ beta_list = list(beta_set)
+ filtered_list = filter_and_transform_beta_headers(
+ beta_headers=beta_list,
+ provider="bedrock",
+ )
+
+ # Update the set with filtered headers
+ beta_set.clear()
+ beta_set.update(filtered_list)
+
+ # 2.1. Handle model-specific exceptions: structured-outputs is only supported on Opus 4.6
+ # Re-add structured-outputs if it was in the original set and model is Opus 4.6
+ model_lower = model.lower()
+ is_opus_4_6 = any(pattern in model_lower for pattern in ["opus-4.6", "opus_4.6", "opus-4-6", "opus_4_6"])
+ if is_opus_4_6 and "structured-outputs-2025-11-13" in beta_list:
+ beta_set.add("structured-outputs-2025-11-13")
+
+ # 3. Filter out extended thinking headers for models that don't support them
extended_thinking_patterns = [
"extended-thinking",
"interleaved-thinking",
]
if not self._supports_extended_thinking_on_bedrock(model):
+ beta_headers_to_remove = set()
for beta in beta_set:
for pattern in extended_thinking_patterns:
if pattern in beta.lower():
beta_headers_to_remove.add(beta)
break
-
- # Remove all filtered headers
- for beta in beta_headers_to_remove:
- beta_set.discard(beta)
-
- # 3. Translate advanced-tool-use to Bedrock-specific headers for models that support tool search
- # Ref: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages-request-response.html
- # Ref: https://platform.claude.com/docs/en/agents-and-tools/tool-use/tool-search-tool
- if has_advanced_tool_use and self._supports_tool_search_on_bedrock(model):
- beta_set.add("tool-search-tool-2025-10-19")
- beta_set.add("tool-examples-2025-10-29")
+
+ for beta in beta_headers_to_remove:
+ beta_set.discard(beta)
def _get_tool_search_beta_header_for_bedrock(
self,
diff --git a/litellm/llms/databricks/chat/transformation.py b/litellm/llms/databricks/chat/transformation.py
index 2b7f5dd5995..e9ae94307d4 100644
--- a/litellm/llms/databricks/chat/transformation.py
+++ b/litellm/llms/databricks/chat/transformation.py
@@ -298,7 +298,8 @@ def map_openai_params(
if "reasoning_effort" in non_default_params and "claude" in model:
optional_params["thinking"] = AnthropicConfig._map_reasoning_effort(
- non_default_params.get("reasoning_effort")
+ reasoning_effort=non_default_params.get("reasoning_effort"),
+ model=model
)
optional_params.pop("reasoning_effort", None)
## handle thinking tokens
diff --git a/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/experimental_pass_through/transformation.py b/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/experimental_pass_through/transformation.py
index 9b8ff3ecc2d..918b8ecc225 100644
--- a/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/experimental_pass_through/transformation.py
+++ b/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/experimental_pass_through/transformation.py
@@ -1,5 +1,8 @@
from typing import Any, Dict, List, Optional, Tuple
+from litellm.anthropic_beta_headers_manager import (
+ update_headers_with_filtered_beta,
+)
from litellm.llms.anthropic.common_utils import AnthropicModelInfo
from litellm.llms.anthropic.experimental_pass_through.messages.transformation import (
AnthropicMessagesConfig,
@@ -7,7 +10,6 @@
from litellm.types.llms.anthropic import (
ANTHROPIC_BETA_HEADER_VALUES,
ANTHROPIC_HOSTED_TOOLS,
- ANTHROPIC_PROMPT_CACHING_SCOPE_BETA_HEADER,
)
from litellm.types.llms.anthropic_tool_search import get_tool_search_beta_header
from litellm.types.llms.vertex_ai import VertexPartnerProvider
@@ -65,10 +67,6 @@ def validate_anthropic_messages_environment(
existing_beta = headers.get("anthropic-beta")
if existing_beta:
beta_values.update(b.strip() for b in existing_beta.split(","))
-
- # Use the helper to remove unsupported beta headers
- self.remove_unsupported_beta(headers)
- beta_values.discard(ANTHROPIC_PROMPT_CACHING_SCOPE_BETA_HEADER)
# Check for web search tool
for tool in tools:
@@ -84,6 +82,12 @@ def validate_anthropic_messages_environment(
if beta_values:
headers["anthropic-beta"] = ",".join(beta_values)
+ # Filter out unsupported beta headers for Vertex AI
+ headers = update_headers_with_filtered_beta(
+ headers=headers,
+ provider="vertex_ai",
+ )
+
return headers, api_base
def get_complete_url(
@@ -128,23 +132,3 @@ def transform_anthropic_messages_request(
) # do not pass output_format in request body to vertex ai - vertex ai does not support output_format as yet
return anthropic_messages_request
-
- def remove_unsupported_beta(self, headers: dict) -> None:
- """
- Helper method to remove unsupported beta headers from the beta headers.
- Modifies headers in place.
- """
- unsupported_beta_headers = [
- ANTHROPIC_PROMPT_CACHING_SCOPE_BETA_HEADER
- ]
- existing_beta = headers.get("anthropic-beta")
- if existing_beta:
- filtered_beta = [
- b.strip()
- for b in existing_beta.split(",")
- if b.strip() not in unsupported_beta_headers
- ]
- if filtered_beta:
- headers["anthropic-beta"] = ",".join(filtered_beta)
- elif "anthropic-beta" in headers:
- del headers["anthropic-beta"]
diff --git a/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/transformation.py b/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/transformation.py
index 1df07f405e6..0b728d88e76 100644
--- a/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/transformation.py
+++ b/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/transformation.py
@@ -51,6 +51,40 @@ class VertexAIAnthropicConfig(AnthropicConfig):
def custom_llm_provider(self) -> Optional[str]:
return "vertex_ai"
+ def _add_context_management_beta_headers(
+ self, beta_set: set, context_management: dict
+ ) -> None:
+ """
+ Add context_management beta headers to the beta_set.
+
+ - If any edit has type "compact_20260112", add compact-2026-01-12 header
+ - For all other edits, add context-management-2025-06-27 header
+
+ Args:
+ beta_set: Set of beta headers to modify in-place
+ context_management: The context_management dict from optional_params
+ """
+ from litellm.types.llms.anthropic import ANTHROPIC_BETA_HEADER_VALUES
+
+ edits = context_management.get("edits", [])
+ has_compact = False
+ has_other = False
+
+ for edit in edits:
+ edit_type = edit.get("type", "")
+ if edit_type == "compact_20260112":
+ has_compact = True
+ else:
+ has_other = True
+
+ # Add compact header if any compact edits exist
+ if has_compact:
+ beta_set.add(ANTHROPIC_BETA_HEADER_VALUES.COMPACT_2026_01_12.value)
+
+ # Add context management header if any other edits exist
+ if has_other:
+ beta_set.add(ANTHROPIC_BETA_HEADER_VALUES.CONTEXT_MANAGEMENT_2025_06_27.value)
+
def transform_request(
self,
model: str,
@@ -86,6 +120,11 @@ def transform_request(
beta_set = set(auto_betas)
if tool_search_used:
beta_set.add("tool-search-tool-2025-10-19") # Vertex requires this header for tool search
+
+ # Add context_management beta headers (compact and/or context-management)
+ context_management = optional_params.get("context_management")
+ if context_management:
+ self._add_context_management_beta_headers(beta_set, context_management)
if beta_set:
data["anthropic_beta"] = list(beta_set)
diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
index 04ad5ed0c1c..0da47634a94 100644
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -963,7 +963,7 @@
"supports_vision": true,
"tool_use_system_prompt_tokens": 159
},
- "anthropic.claude-opus-4-6-v1:0": {
+ "anthropic.claude-opus-4-6-v1": {
"cache_creation_input_token_cost": 6.25e-06,
"cache_creation_input_token_cost_above_200k_tokens": 1.25e-05,
"cache_read_input_token_cost": 5e-07,
@@ -1023,7 +1023,7 @@
"supports_vision": true,
"tool_use_system_prompt_tokens": 346
},
- "global.anthropic.claude-opus-4-6-v1:0": {
+ "global.anthropic.claude-opus-4-6-v1": {
"cache_creation_input_token_cost": 6.25e-06,
"cache_creation_input_token_cost_above_200k_tokens": 1.25e-05,
"cache_read_input_token_cost": 5e-07,
@@ -1143,7 +1143,7 @@
"supports_vision": true,
"tool_use_system_prompt_tokens": 346
},
- "eu.anthropic.claude-opus-4-6-v1:0": {
+ "eu.anthropic.claude-opus-4-6-v1": {
"cache_creation_input_token_cost": 6.875e-06,
"cache_creation_input_token_cost_above_200k_tokens": 1.375e-05,
"cache_read_input_token_cost": 5.5e-07,
@@ -1203,7 +1203,7 @@
"supports_vision": true,
"tool_use_system_prompt_tokens": 346
},
- "apac.anthropic.claude-opus-4-6-v1:0": {
+ "apac.anthropic.claude-opus-4-6-v1": {
"cache_creation_input_token_cost": 6.875e-06,
"cache_creation_input_token_cost_above_200k_tokens": 1.375e-05,
"cache_read_input_token_cost": 5.5e-07,
@@ -3494,29 +3494,6 @@
"supports_tool_choice": true,
"supports_vision": true
},
- "azure/gpt-5-search-api": {
- "cache_read_input_token_cost": 1.25e-07,
- "input_cost_per_token": 1.25e-06,
- "litellm_provider": "azure",
- "max_input_tokens": 128000,
- "max_output_tokens": 4096,
- "max_tokens": 4096,
- "mode": "chat",
- "output_cost_per_token": 1e-05,
- "search_context_cost_per_query": {
- "search_context_size_high": 0.05,
- "search_context_size_low": 0.03,
- "search_context_size_medium": 0.035
- },
- "supports_function_calling": true,
- "supports_parallel_function_calling": true,
- "supports_prompt_caching": true,
- "supports_response_schema": true,
- "supports_system_messages": true,
- "supports_tool_choice": true,
- "supports_vision": true,
- "supports_web_search": true
- },
"azure/gpt-5-2025-08-07": {
"cache_read_input_token_cost": 1.25e-07,
"input_cost_per_token": 1.25e-06,
@@ -7836,6 +7813,37 @@
"supports_vision": true,
"tool_use_system_prompt_tokens": 346
},
+ "us/claude-opus-4-6": {
+ "cache_creation_input_token_cost": 6.875e-06,
+ "cache_creation_input_token_cost_above_200k_tokens": 1.375e-05,
+ "cache_creation_input_token_cost_above_1hr": 1.1e-05,
+ "cache_read_input_token_cost": 5.5e-07,
+ "cache_read_input_token_cost_above_200k_tokens": 1.1e-06,
+ "input_cost_per_token": 5.5e-06,
+ "input_cost_per_token_above_200k_tokens": 1.1e-05,
+ "litellm_provider": "anthropic",
+ "max_input_tokens": 200000,
+ "max_output_tokens": 128000,
+ "max_tokens": 128000,
+ "mode": "chat",
+ "output_cost_per_token": 2.75e-05,
+ "output_cost_per_token_above_200k_tokens": 4.125e-05,
+ "search_context_cost_per_query": {
+ "search_context_size_high": 0.01,
+ "search_context_size_low": 0.01,
+ "search_context_size_medium": 0.01
+ },
+ "supports_assistant_prefill": false,
+ "supports_computer_use": true,
+ "supports_function_calling": true,
+ "supports_pdf_input": true,
+ "supports_prompt_caching": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_tool_choice": true,
+ "supports_vision": true,
+ "tool_use_system_prompt_tokens": 346
+ },
"claude-opus-4-6-20260205": {
"cache_creation_input_token_cost": 6.25e-06,
"cache_creation_input_token_cost_above_200k_tokens": 1.25e-05,
@@ -7867,6 +7875,37 @@
"supports_vision": true,
"tool_use_system_prompt_tokens": 346
},
+ "us/claude-opus-4-6-20260205": {
+ "cache_creation_input_token_cost": 6.875e-06,
+ "cache_creation_input_token_cost_above_200k_tokens": 1.375e-05,
+ "cache_creation_input_token_cost_above_1hr": 1.1e-05,
+ "cache_read_input_token_cost": 5.5e-07,
+ "cache_read_input_token_cost_above_200k_tokens": 1.1e-06,
+ "input_cost_per_token": 5.5e-06,
+ "input_cost_per_token_above_200k_tokens": 1.1e-05,
+ "litellm_provider": "anthropic",
+ "max_input_tokens": 200000,
+ "max_output_tokens": 128000,
+ "max_tokens": 128000,
+ "mode": "chat",
+ "output_cost_per_token": 2.75e-05,
+ "output_cost_per_token_above_200k_tokens": 4.125e-05,
+ "search_context_cost_per_query": {
+ "search_context_size_high": 0.01,
+ "search_context_size_low": 0.01,
+ "search_context_size_medium": 0.01
+ },
+ "supports_assistant_prefill": false,
+ "supports_computer_use": true,
+ "supports_function_calling": true,
+ "supports_pdf_input": true,
+ "supports_prompt_caching": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_tool_choice": true,
+ "supports_vision": true,
+ "tool_use_system_prompt_tokens": 346
+ },
"claude-sonnet-4-20250514": {
"deprecation_date": "2026-05-14",
"cache_creation_input_token_cost": 3.75e-06,
@@ -18758,29 +18797,6 @@
"supports_service_tier": true,
"supports_vision": true
},
- "gpt-5-search-api": {
- "cache_read_input_token_cost": 1.25e-07,
- "input_cost_per_token": 1.25e-06,
- "litellm_provider": "openai",
- "max_input_tokens": 128000,
- "max_output_tokens": 4096,
- "max_tokens": 4096,
- "mode": "chat",
- "output_cost_per_token": 1e-05,
- "search_context_cost_per_query": {
- "search_context_size_high": 0.05,
- "search_context_size_low": 0.03,
- "search_context_size_medium": 0.035
- },
- "supports_function_calling": true,
- "supports_parallel_function_calling": true,
- "supports_prompt_caching": true,
- "supports_response_schema": true,
- "supports_system_messages": true,
- "supports_tool_choice": true,
- "supports_vision": true,
- "supports_web_search": true
- },
"gpt-5.1": {
"cache_read_input_token_cost": 1.25e-07,
"cache_read_input_token_cost_priority": 2.5e-07,
diff --git a/litellm/types/llms/anthropic.py b/litellm/types/llms/anthropic.py
index 62e775d4faa..fedf419efd6 100644
--- a/litellm/types/llms/anthropic.py
+++ b/litellm/types/llms/anthropic.py
@@ -613,7 +613,7 @@ class AnthropicChatCompletionUsageBlock(ChatCompletionUsageBlock, total=False):
class AnthropicThinkingParam(TypedDict, total=False):
- type: Literal["enabled"]
+ type: Literal["enabled", "adaptive"]
budget_tokens: int
@@ -633,6 +633,7 @@ class ANTHROPIC_BETA_HEADER_VALUES(str, Enum):
WEB_FETCH_2025_09_10 = "web-fetch-2025-09-10"
WEB_SEARCH_2025_03_05 = "web-search-2025-03-05"
CONTEXT_MANAGEMENT_2025_06_27 = "context-management-2025-06-27"
+ COMPACT_2026_01_12 = "compact-2026-01-12"
STRUCTURED_OUTPUT_2025_09_25 = "structured-outputs-2025-11-13"
ADVANCED_TOOL_USE_2025_11_20 = "advanced-tool-use-2025-11-20"
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
index a3e1edb4ae5..0da47634a94 100644
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@@ -963,7 +963,7 @@
"supports_vision": true,
"tool_use_system_prompt_tokens": 159
},
- "anthropic.claude-opus-4-6-v1:0": {
+ "anthropic.claude-opus-4-6-v1": {
"cache_creation_input_token_cost": 6.25e-06,
"cache_creation_input_token_cost_above_200k_tokens": 1.25e-05,
"cache_read_input_token_cost": 5e-07,
@@ -1023,7 +1023,7 @@
"supports_vision": true,
"tool_use_system_prompt_tokens": 346
},
- "global.anthropic.claude-opus-4-6-v1:0": {
+ "global.anthropic.claude-opus-4-6-v1": {
"cache_creation_input_token_cost": 6.25e-06,
"cache_creation_input_token_cost_above_200k_tokens": 1.25e-05,
"cache_read_input_token_cost": 5e-07,
@@ -1143,7 +1143,7 @@
"supports_vision": true,
"tool_use_system_prompt_tokens": 346
},
- "eu.anthropic.claude-opus-4-6-v1:0": {
+ "eu.anthropic.claude-opus-4-6-v1": {
"cache_creation_input_token_cost": 6.875e-06,
"cache_creation_input_token_cost_above_200k_tokens": 1.375e-05,
"cache_read_input_token_cost": 5.5e-07,
@@ -1203,7 +1203,7 @@
"supports_vision": true,
"tool_use_system_prompt_tokens": 346
},
- "apac.anthropic.claude-opus-4-6-v1:0": {
+ "apac.anthropic.claude-opus-4-6-v1": {
"cache_creation_input_token_cost": 6.875e-06,
"cache_creation_input_token_cost_above_200k_tokens": 1.375e-05,
"cache_read_input_token_cost": 5.5e-07,
@@ -7813,6 +7813,37 @@
"supports_vision": true,
"tool_use_system_prompt_tokens": 346
},
+ "us/claude-opus-4-6": {
+ "cache_creation_input_token_cost": 6.875e-06,
+ "cache_creation_input_token_cost_above_200k_tokens": 1.375e-05,
+ "cache_creation_input_token_cost_above_1hr": 1.1e-05,
+ "cache_read_input_token_cost": 5.5e-07,
+ "cache_read_input_token_cost_above_200k_tokens": 1.1e-06,
+ "input_cost_per_token": 5.5e-06,
+ "input_cost_per_token_above_200k_tokens": 1.1e-05,
+ "litellm_provider": "anthropic",
+ "max_input_tokens": 200000,
+ "max_output_tokens": 128000,
+ "max_tokens": 128000,
+ "mode": "chat",
+ "output_cost_per_token": 2.75e-05,
+ "output_cost_per_token_above_200k_tokens": 4.125e-05,
+ "search_context_cost_per_query": {
+ "search_context_size_high": 0.01,
+ "search_context_size_low": 0.01,
+ "search_context_size_medium": 0.01
+ },
+ "supports_assistant_prefill": false,
+ "supports_computer_use": true,
+ "supports_function_calling": true,
+ "supports_pdf_input": true,
+ "supports_prompt_caching": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_tool_choice": true,
+ "supports_vision": true,
+ "tool_use_system_prompt_tokens": 346
+ },
"claude-opus-4-6-20260205": {
"cache_creation_input_token_cost": 6.25e-06,
"cache_creation_input_token_cost_above_200k_tokens": 1.25e-05,
@@ -7844,6 +7875,37 @@
"supports_vision": true,
"tool_use_system_prompt_tokens": 346
},
+ "us/claude-opus-4-6-20260205": {
+ "cache_creation_input_token_cost": 6.875e-06,
+ "cache_creation_input_token_cost_above_200k_tokens": 1.375e-05,
+ "cache_creation_input_token_cost_above_1hr": 1.1e-05,
+ "cache_read_input_token_cost": 5.5e-07,
+ "cache_read_input_token_cost_above_200k_tokens": 1.1e-06,
+ "input_cost_per_token": 5.5e-06,
+ "input_cost_per_token_above_200k_tokens": 1.1e-05,
+ "litellm_provider": "anthropic",
+ "max_input_tokens": 200000,
+ "max_output_tokens": 128000,
+ "max_tokens": 128000,
+ "mode": "chat",
+ "output_cost_per_token": 2.75e-05,
+ "output_cost_per_token_above_200k_tokens": 4.125e-05,
+ "search_context_cost_per_query": {
+ "search_context_size_high": 0.01,
+ "search_context_size_low": 0.01,
+ "search_context_size_medium": 0.01
+ },
+ "supports_assistant_prefill": false,
+ "supports_computer_use": true,
+ "supports_function_calling": true,
+ "supports_pdf_input": true,
+ "supports_prompt_caching": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_tool_choice": true,
+ "supports_vision": true,
+ "tool_use_system_prompt_tokens": 346
+ },
"claude-sonnet-4-20250514": {
"deprecation_date": "2026-05-14",
"cache_creation_input_token_cost": 3.75e-06,
diff --git a/tests/test_litellm/llms/anthropic/chat/test_anthropic_chat_transformation.py b/tests/test_litellm/llms/anthropic/chat/test_anthropic_chat_transformation.py
index eee0b267fad..49db7367c67 100644
--- a/tests/test_litellm/llms/anthropic/chat/test_anthropic_chat_transformation.py
+++ b/tests/test_litellm/llms/anthropic/chat/test_anthropic_chat_transformation.py
@@ -185,7 +185,7 @@ def test_extract_response_content_with_citations():
},
}
- _, citations, _, _, _, _ , _= config.extract_response_content(completion_response)
+ _, citations, _, _, _, _, _, _ = config.extract_response_content(completion_response)
assert citations == [
[
{
@@ -342,7 +342,7 @@ def test_web_search_tool_result_extraction():
}
}
- text, citations, thinking_blocks, reasoning_content, tool_calls, web_search_results, tool_results = config.extract_response_content(
+ text, citations, thinking_blocks, reasoning_content, tool_calls, web_search_results, tool_results, compaction_blocks = config.extract_response_content(
completion_response
)
@@ -474,7 +474,7 @@ def test_multiple_web_search_tool_results():
]
}
- text, citations, thinking_blocks, reasoning_content, tool_calls, web_search_results, tool_results = config.extract_response_content(
+ text, citations, thinking_blocks, reasoning_content, tool_calls, web_search_results, tool_results, compaction_blocks = config.extract_response_content(
completion_response
)
@@ -817,59 +817,6 @@ def test_anthropic_chat_transform_request_includes_context_management():
assert result["context_management"] == _sample_context_management_payload()
-def test_transform_parsed_response_includes_context_management_metadata():
- import httpx
-
- from litellm.types.utils import ModelResponse
-
- config = AnthropicConfig()
- context_management_payload = {
- "applied_edits": [
- {
- "type": "clear_tool_uses_20250919",
- "cleared_tool_uses": 2,
- "cleared_input_tokens": 5000,
- }
- ]
- }
- completion_response = {
- "id": "msg_context_management_test",
- "type": "message",
- "role": "assistant",
- "model": "claude-sonnet-4-20250514",
- "content": [{"type": "text", "text": "Done."}],
- "stop_reason": "end_turn",
- "stop_sequence": None,
- "usage": {
- "input_tokens": 10,
- "cache_creation_input_tokens": 0,
- "cache_read_input_tokens": 0,
- "output_tokens": 5,
- },
- "context_management": context_management_payload,
- }
- raw_response = httpx.Response(
- status_code=200,
- headers={},
- )
- model_response = ModelResponse()
-
- result = config.transform_parsed_response(
- completion_response=completion_response,
- raw_response=raw_response,
- model_response=model_response,
- json_mode=False,
- prefix_prompt=None,
- )
-
- assert result.__dict__.get("context_management") == context_management_payload
- provider_fields = result.choices[0].message.provider_specific_fields
- assert (
- provider_fields
- and provider_fields["context_management"] == context_management_payload
- )
-
-
def test_anthropic_structured_output_beta_header():
from litellm.types.utils import CallTypes
from litellm.utils import return_raw_request
@@ -1043,7 +990,7 @@ def test_server_tool_use_in_response():
]
}
- text, citations, thinking_blocks, reasoning_content, tool_calls, web_search_results, tool_results = config.extract_response_content(
+ text, citations, thinking_blocks, reasoning_content, tool_calls, web_search_results, tool_results, compaction_blocks = config.extract_response_content(
completion_response
)
@@ -1171,7 +1118,7 @@ def test_tool_search_complete_response_parsing():
}
# Extract content
- text, citations, thinking_blocks, reasoning_content, tool_calls, web_search_results, tool_results = config.extract_response_content(
+ text, citations, thinking_blocks, reasoning_content, tool_calls, web_search_results, tool_results, compaction_blocks = config.extract_response_content(
completion_response
)
@@ -1291,7 +1238,7 @@ def test_caller_field_in_response():
"usage": {"input_tokens": 100, "output_tokens": 50}
}
- text, citations, thinking, reasoning, tool_calls, web_search_results, tool_results = config.extract_response_content(completion_response)
+ text, citations, thinking, reasoning, tool_calls, web_search_results, tool_results, compaction_blocks = config.extract_response_content(completion_response)
assert len(tool_calls) == 1
assert tool_calls[0]["id"] == "toolu_123"
@@ -1934,6 +1881,75 @@ def test_calculate_usage_completion_tokens_details_with_reasoning():
assert usage.completion_tokens == 500
+# ============ Reasoning Effort Tests ============
+
+
+def test_reasoning_effort_maps_to_adaptive_thinking_for_opus_4_6():
+ """
+ Test that reasoning_effort maps to adaptive thinking type for Claude Opus 4.6.
+
+ For Claude Opus 4.6, reasoning_effort should map to {"type": "adaptive"}
+ regardless of the effort level specified.
+ """
+ config = AnthropicConfig()
+
+ # Test with different reasoning_effort values - all should map to adaptive
+ for effort in ["low", "medium", "high", "minimal"]:
+ non_default_params = {"reasoning_effort": effort}
+ optional_params = {}
+
+ result = config.map_openai_params(
+ non_default_params=non_default_params,
+ optional_params=optional_params,
+ model="claude-opus-4-6-20250514",
+ drop_params=False
+ )
+
+ # Should map to adaptive thinking type
+ assert "thinking" in result
+ assert result["thinking"]["type"] == "adaptive"
+ # Should not have budget_tokens for adaptive type
+ assert "budget_tokens" not in result["thinking"]
+ # reasoning_effort should not be in the result (it's transformed to thinking)
+ assert "reasoning_effort" not in result
+
+
+def test_reasoning_effort_maps_to_budget_thinking_for_non_opus_4_6():
+ """
+ Test that reasoning_effort maps to budget-based thinking config for non-Opus 4.6 models.
+
+ For models other than Claude Opus 4.6, reasoning_effort should map to
+ thinking config with budget_tokens based on the effort level.
+ """
+ config = AnthropicConfig()
+
+ # Test with Claude Sonnet 4.5 (non-Opus 4.6 model)
+ test_cases = [
+ ("low", 1024), # DEFAULT_REASONING_EFFORT_LOW_THINKING_BUDGET
+ ("medium", 2048), # DEFAULT_REASONING_EFFORT_MEDIUM_THINKING_BUDGET
+ ("high", 4096), # DEFAULT_REASONING_EFFORT_HIGH_THINKING_BUDGET
+ ("minimal", 128), # DEFAULT_REASONING_EFFORT_MINIMAL_THINKING_BUDGET
+ ]
+
+ for effort, expected_budget in test_cases:
+ non_default_params = {"reasoning_effort": effort}
+ optional_params = {}
+
+ result = config.map_openai_params(
+ non_default_params=non_default_params,
+ optional_params=optional_params,
+ model="claude-sonnet-4-5-20250929",
+ drop_params=False
+ )
+
+ # Should map to enabled thinking type with budget_tokens
+ assert "thinking" in result
+ assert result["thinking"]["type"] == "enabled"
+ assert result["thinking"]["budget_tokens"] == expected_budget
+ # reasoning_effort should not be in the result (it's transformed to thinking)
+ assert "reasoning_effort" not in result
+
+
def test_code_execution_tool_results_extraction():
"""
Test that code execution tool results (bash_code_execution_tool_result,
@@ -2174,3 +2190,319 @@ def test_web_search_tool_result_backwards_compatibility():
# Should NOT be in tool_results
assert provider_fields.get("tool_results") is None
+
+
+# ============ Compaction Tests ============
+
+
+def test_compaction_block_extraction():
+ """
+ Test that compaction blocks are correctly extracted from Anthropic response.
+ """
+ config = AnthropicConfig()
+
+ completion_response = {
+ "id": "msg_compaction_test",
+ "type": "message",
+ "role": "assistant",
+ "model": "claude-opus-4-6",
+ "content": [
+ {
+ "type": "compaction",
+ "content": "Summary of the conversation: The user requested help building a web scraper..."
+ },
+ {
+ "type": "text",
+ "text": "I don't have access to real-time data, so I can't provide the current weather in San Francisco."
+ }
+ ],
+ "stop_reason": "max_tokens",
+ "stop_sequence": None,
+ "usage": {
+ "input_tokens": 86,
+ "output_tokens": 100
+ }
+ }
+
+ text, citations, thinking_blocks, reasoning_content, tool_calls, web_search_results, tool_results, compaction_blocks = config.extract_response_content(
+ completion_response
+ )
+
+ # Verify compaction blocks are extracted
+ assert compaction_blocks is not None
+ assert len(compaction_blocks) == 1
+ assert compaction_blocks[0]["type"] == "compaction"
+ assert "Summary of the conversation" in compaction_blocks[0]["content"]
+
+ # Verify text content is extracted
+ assert "I don't have access to real-time data" in text
+
+
+def test_compaction_block_in_provider_specific_fields():
+ """
+ Test that compaction blocks are included in provider_specific_fields.
+ """
+ import httpx
+
+ from litellm.types.utils import ModelResponse
+
+ config = AnthropicConfig()
+
+ completion_response = {
+ "id": "msg_compaction_provider_fields",
+ "type": "message",
+ "role": "assistant",
+ "model": "claude-opus-4-6",
+ "content": [
+ {
+ "type": "compaction",
+ "content": "Summary of the conversation: The user requested help building a web scraper..."
+ },
+ {
+ "type": "text",
+ "text": "Here is the response."
+ }
+ ],
+ "stop_reason": "end_turn",
+ "usage": {
+ "input_tokens": 50,
+ "output_tokens": 25
+ }
+ }
+
+ raw_response = httpx.Response(status_code=200, headers={})
+ model_response = ModelResponse()
+
+ result = config.transform_parsed_response(
+ completion_response=completion_response,
+ raw_response=raw_response,
+ model_response=model_response,
+ json_mode=False,
+ prefix_prompt=None,
+ )
+
+ # Verify compaction_blocks is in provider_specific_fields
+ provider_fields = result.choices[0].message.provider_specific_fields
+ assert provider_fields is not None
+ assert "compaction_blocks" in provider_fields
+ assert len(provider_fields["compaction_blocks"]) == 1
+ assert provider_fields["compaction_blocks"][0]["type"] == "compaction"
+ assert "Summary of the conversation" in provider_fields["compaction_blocks"][0]["content"]
+
+
+def test_multiple_compaction_blocks():
+ """
+ Test that multiple compaction blocks are all extracted.
+ """
+ config = AnthropicConfig()
+
+ completion_response = {
+ "content": [
+ {
+ "type": "compaction",
+ "content": "First summary..."
+ },
+ {
+ "type": "text",
+ "text": "Some text."
+ },
+ {
+ "type": "compaction",
+ "content": "Second summary..."
+ }
+ ]
+ }
+
+ text, citations, thinking_blocks, reasoning_content, tool_calls, web_search_results, tool_results, compaction_blocks = config.extract_response_content(
+ completion_response
+ )
+
+ # Verify both compaction blocks are extracted
+ assert compaction_blocks is not None
+ assert len(compaction_blocks) == 2
+ assert compaction_blocks[0]["content"] == "First summary..."
+ assert compaction_blocks[1]["content"] == "Second summary..."
+
+
+def test_compaction_block_request_transformation():
+ """
+ Test that compaction blocks from provider_specific_fields are correctly
+ transformed back to Anthropic format in requests.
+ """
+ from litellm.litellm_core_utils.prompt_templates.factory import (
+ anthropic_messages_pt,
+ )
+
+ messages = [
+ {
+ "role": "user",
+ "content": "What is the weather in San Francisco?"
+ },
+ {
+ "role": "assistant",
+ "content": [
+ {
+ "type": "text",
+ "text": "I don't have access to real-time data."
+ }
+ ],
+ "provider_specific_fields": {
+ "compaction_blocks": [
+ {
+ "type": "compaction",
+ "content": "Summary of the conversation: The user requested help building a web scraper..."
+ }
+ ]
+ }
+ },
+ {
+ "role": "user",
+ "content": "What about New York?"
+ }
+ ]
+
+ result = anthropic_messages_pt(
+ messages=messages,
+ model="claude-opus-4-6",
+ llm_provider="anthropic"
+ )
+
+ # Find the assistant message
+ assistant_message = None
+ for msg in result:
+ if msg["role"] == "assistant":
+ assistant_message = msg
+ break
+
+ assert assistant_message is not None
+ assert "content" in assistant_message
+ assert isinstance(assistant_message["content"], list)
+
+ # Verify compaction block is at the beginning
+ assert assistant_message["content"][0]["type"] == "compaction"
+ assert "Summary of the conversation" in assistant_message["content"][0]["content"]
+
+ # Verify text content follows
+ text_blocks = [c for c in assistant_message["content"] if c.get("type") == "text"]
+ assert len(text_blocks) > 0
+ assert "I don't have access to real-time data" in text_blocks[0]["text"]
+
+
+def test_compaction_with_context_management():
+ """
+ Test that compaction works with context_management parameter.
+ """
+ config = AnthropicConfig()
+
+ messages = [{"role": "user", "content": "Hello"}]
+ optional_params = {
+ "context_management": {
+ "edits": [
+ {
+ "type": "compact_20260112"
+ }
+ ]
+ },
+ "max_tokens": 100
+ }
+
+ result = config.transform_request(
+ model="claude-opus-4-6",
+ messages=messages,
+ optional_params=optional_params,
+ litellm_params={},
+ headers={}
+ )
+
+ # Verify context_management is included
+ assert "context_management" in result
+ assert result["context_management"]["edits"][0]["type"] == "compact_20260112"
+
+
+def test_compaction_block_with_other_content_types():
+ """
+ Test that compaction blocks work alongside other content types like thinking blocks and tool calls.
+ """
+ config = AnthropicConfig()
+
+ completion_response = {
+ "content": [
+ {
+ "type": "compaction",
+ "content": "Summary of previous conversation..."
+ },
+ {
+ "type": "thinking",
+ "thinking": "Let me think about this..."
+ },
+ {
+ "type": "text",
+ "text": "Based on my analysis..."
+ },
+ {
+ "type": "tool_use",
+ "id": "toolu_123",
+ "name": "get_weather",
+ "input": {"location": "San Francisco"}
+ }
+ ]
+ }
+
+ text, citations, thinking_blocks, reasoning_content, tool_calls, web_search_results, tool_results, compaction_blocks = config.extract_response_content(
+ completion_response
+ )
+
+ # Verify all content types are extracted
+ assert compaction_blocks is not None
+ assert len(compaction_blocks) == 1
+ assert thinking_blocks is not None
+ assert len(thinking_blocks) == 1
+ assert "Based on my analysis" in text
+ assert len(tool_calls) == 1
+ assert tool_calls[0]["function"]["name"] == "get_weather"
+
+
+def test_compaction_block_empty_list_not_added():
+ """
+ Test that empty compaction_blocks list is not added to provider_specific_fields.
+ """
+ import httpx
+
+ from litellm.types.utils import ModelResponse
+
+ config = AnthropicConfig()
+
+ # Response without compaction blocks
+ completion_response = {
+ "id": "msg_no_compaction",
+ "type": "message",
+ "role": "assistant",
+ "model": "claude-opus-4-6",
+ "content": [
+ {
+ "type": "text",
+ "text": "Just a regular response."
+ }
+ ],
+ "stop_reason": "end_turn",
+ "usage": {
+ "input_tokens": 10,
+ "output_tokens": 5
+ }
+ }
+
+ raw_response = httpx.Response(status_code=200, headers={})
+ model_response = ModelResponse()
+
+ result = config.transform_parsed_response(
+ completion_response=completion_response,
+ raw_response=raw_response,
+ model_response=model_response,
+ json_mode=False,
+ prefix_prompt=None,
+ )
+
+ # Verify compaction_blocks is not in provider_specific_fields when there are none
+ provider_fields = result.choices[0].message.provider_specific_fields
+ if provider_fields:
+ assert "compaction_blocks" not in provider_fields or provider_fields.get("compaction_blocks") is None
diff --git a/tests/test_litellm/llms/azure_ai/claude/test_azure_anthropic_transformation.py b/tests/test_litellm/llms/azure_ai/claude/test_azure_anthropic_transformation.py
index e43a899325f..f0f8a9d91bf 100644
--- a/tests/test_litellm/llms/azure_ai/claude/test_azure_anthropic_transformation.py
+++ b/tests/test_litellm/llms/azure_ai/claude/test_azure_anthropic_transformation.py
@@ -235,3 +235,97 @@ def test_transform_request_removes_unsupported_params(self):
assert result["max_tokens"] == 100
assert "messages" in result
+ def test_context_management_compact_beta_header(self):
+ """Test that context_management with compact adds the correct beta header for Azure AI"""
+ config = AzureAnthropicConfig()
+
+ messages = [{"role": "user", "content": "Hello"}]
+ optional_params = {
+ "context_management": {
+ "edits": [
+ {
+ "type": "compact_20260112"
+ }
+ ]
+ },
+ "max_tokens": 100
+ }
+ litellm_params = {"api_key": "test-key"}
+ headers = {"api-key": "test-key"}
+
+ with patch(
+ "litellm.llms.azure.common_utils.BaseAzureLLM._base_validate_azure_environment"
+ ) as mock_validate:
+ mock_validate.return_value = {"api-key": "test-key"}
+ result = config.transform_request(
+ model="claude-opus-4-6",
+ messages=messages,
+ optional_params=optional_params,
+ litellm_params=litellm_params,
+ headers=headers,
+ )
+
+ # Verify context_management is included
+ assert "context_management" in result
+ assert result["context_management"]["edits"][0]["type"] == "compact_20260112"
+
+ def test_context_management_compact_beta_header_in_headers(self):
+ """Test that compact beta header is added to headers for Azure AI"""
+ config = AzureAnthropicConfig()
+
+ messages = [{"role": "user", "content": "Hello"}]
+ optional_params = {
+ "context_management": {
+ "edits": [
+ {
+ "type": "compact_20260112"
+ }
+ ]
+ },
+ "max_tokens": 100
+ }
+
+ # Test that the parent's update_headers_with_optional_anthropic_beta is called
+ # which should add the compact beta header
+ headers = {}
+ headers = config.update_headers_with_optional_anthropic_beta(
+ headers=headers,
+ optional_params=optional_params
+ )
+
+ # Verify compact beta header is present
+ assert "anthropic-beta" in headers
+ assert "compact-2026-01-12" in headers["anthropic-beta"]
+
+ def test_context_management_mixed_edits_beta_headers(self):
+ """Test that context_management with both compact and other edits adds both beta headers"""
+ config = AzureAnthropicConfig()
+
+ messages = [{"role": "user", "content": "Hello"}]
+ optional_params = {
+ "context_management": {
+ "edits": [
+ {
+ "type": "compact_20260112"
+ },
+ {
+ "type": "replace",
+ "message_id": "msg_123",
+ "content": "new content"
+ }
+ ]
+ },
+ "max_tokens": 100
+ }
+
+ headers = {}
+ headers = config.update_headers_with_optional_anthropic_beta(
+ headers=headers,
+ optional_params=optional_params
+ )
+
+ # Verify both beta headers are present
+ assert "anthropic-beta" in headers
+ assert "compact-2026-01-12" in headers["anthropic-beta"]
+ assert "context-management-2025-06-27" in headers["anthropic-beta"]
+
diff --git a/tests/test_litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/test_vertex_ai_partner_models_anthropic_transformation.py b/tests/test_litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/test_vertex_ai_partner_models_anthropic_transformation.py
index 3e6c6f6740c..90ab41aadf6 100644
--- a/tests/test_litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/test_vertex_ai_partner_models_anthropic_transformation.py
+++ b/tests/test_litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/test_vertex_ai_partner_models_anthropic_transformation.py
@@ -6,6 +6,9 @@
sys.path.insert(
0, os.path.abspath("../../../../../..")
) # Adds the parent directory to the system path
+from litellm.anthropic_beta_headers_manager import (
+ update_headers_with_filtered_beta,
+)
from litellm.llms.vertex_ai.vertex_ai_partner_models.anthropic.transformation import (
VertexAIAnthropicConfig,
)
@@ -75,6 +78,76 @@ def test_vertex_ai_anthropic_web_search_header_in_completion():
"anthropic-beta with web-search should not be present for non-Vertex requests"
+def test_vertex_ai_anthropic_context_management_compact_beta_header():
+ """Test that context_management with compact adds the correct beta header for Vertex AI"""
+ config = VertexAIAnthropicConfig()
+
+ messages = [{"role": "user", "content": "Hello"}]
+ optional_params = {
+ "context_management": {
+ "edits": [
+ {
+ "type": "compact_20260112"
+ }
+ ]
+ },
+ "max_tokens": 100,
+ "is_vertex_request": True
+ }
+
+ result = config.transform_request(
+ model="claude-opus-4-6",
+ messages=messages,
+ optional_params=optional_params,
+ litellm_params={},
+ headers={}
+ )
+
+ # Verify context_management is included
+ assert "context_management" in result
+ assert result["context_management"]["edits"][0]["type"] == "compact_20260112"
+
+ # Verify compact beta header is in anthropic_beta field
+ assert "anthropic_beta" in result
+ assert "compact-2026-01-12" in result["anthropic_beta"]
+
+
+def test_vertex_ai_anthropic_context_management_mixed_edits():
+ """Test that context_management with both compact and other edits adds both beta headers"""
+ config = VertexAIAnthropicConfig()
+
+ messages = [{"role": "user", "content": "Hello"}]
+ optional_params = {
+ "context_management": {
+ "edits": [
+ {
+ "type": "compact_20260112"
+ },
+ {
+ "type": "replace",
+ "message_id": "msg_123",
+ "content": "new content"
+ }
+ ]
+ },
+ "max_tokens": 100,
+ "is_vertex_request": True
+ }
+
+ result = config.transform_request(
+ model="claude-opus-4-6",
+ messages=messages,
+ optional_params=optional_params,
+ litellm_params={},
+ headers={}
+ )
+
+ # Verify both beta headers are present
+ assert "anthropic_beta" in result
+ assert "compact-2026-01-12" in result["anthropic_beta"]
+ assert "context-management-2025-06-27" in result["anthropic_beta"]
+
+
def test_vertex_ai_anthropic_structured_output_header_not_added():
"""Test that structured output beta headers are NOT added for Vertex AI requests"""
from litellm.llms.anthropic.chat.transformation import AnthropicConfig
@@ -276,8 +349,7 @@ def test_vertex_ai_partner_models_anthropic_remove_prompt_caching_scope_beta_hea
"anthropic-beta": f"other-feature,{PROMPT_CACHING_BETA_HEADER},web-search-2025-03-05"
}
- config = VertexAIPartnerModelsAnthropicMessagesConfig()
- config.remove_unsupported_beta(headers)
+ headers = update_headers_with_filtered_beta(headers, "vertex_ai")
beta_header = headers.get("anthropic-beta")
assert PROMPT_CACHING_BETA_HEADER not in (beta_header or ""), \
@@ -288,5 +360,5 @@ def test_vertex_ai_partner_models_anthropic_remove_prompt_caching_scope_beta_hea
"Other non-excluded beta headers should remain"
# If prompt-caching was the only value, header should be removed completely
headers2 = {"anthropic-beta": PROMPT_CACHING_BETA_HEADER}
- config.remove_unsupported_beta(headers2)
+ headers2 = update_headers_with_filtered_beta(headers2, "vertex_ai")
assert "anthropic-beta" not in headers2, "Header should be removed if no supported values remain"
\ No newline at end of file
diff --git a/tests/test_litellm/proxy/spend_tracking/test_spend_management_endpoints.py b/tests/test_litellm/proxy/spend_tracking/test_spend_management_endpoints.py
index 13368d0a142..54a276bc97f 100644
--- a/tests/test_litellm/proxy/spend_tracking/test_spend_management_endpoints.py
+++ b/tests/test_litellm/proxy/spend_tracking/test_spend_management_endpoints.py
@@ -205,6 +205,7 @@ def test_can_user_view_spend_log_false_for_other_roles():
"metadata.additional_usage_values.prompt_tokens_details",
"metadata.additional_usage_values.cache_creation_input_tokens",
"metadata.additional_usage_values.cache_read_input_tokens",
+ "metadata.additional_usage_values.inference_geo",
"metadata.litellm_overhead_time_ms",
"metadata.cost_breakdown",
]
diff --git a/tests/test_litellm/test_anthropic_beta_headers_manager.py b/tests/test_litellm/test_anthropic_beta_headers_manager.py
new file mode 100644
index 00000000000..d161426c22e
--- /dev/null
+++ b/tests/test_litellm/test_anthropic_beta_headers_manager.py
@@ -0,0 +1,306 @@
+"""
+Tests for the centralized Anthropic beta headers manager.
+
+Design: JSON config lists UNSUPPORTED headers for each provider.
+Headers not in the unsupported list are passed through.
+Header transformations (e.g., advanced-tool-use -> tool-search-tool) happen in code, not in JSON.
+"""
+
+import pytest
+
+from litellm.anthropic_beta_headers_manager import (
+ filter_and_transform_beta_headers,
+ get_provider_beta_header,
+ get_provider_name,
+ get_unsupported_headers,
+ is_beta_header_supported,
+ update_headers_with_filtered_beta,
+)
+
+
+class TestProviderNameResolution:
+ """Test provider name resolution and aliases."""
+
+ def test_get_provider_name_direct(self):
+ """Test direct provider names."""
+ assert get_provider_name("anthropic") == "anthropic"
+ assert get_provider_name("bedrock") == "bedrock"
+ assert get_provider_name("vertex_ai") == "vertex_ai"
+ assert get_provider_name("azure_ai") == "azure_ai"
+
+ def test_get_provider_name_alias(self):
+ """Test provider aliases."""
+ # Note: Aliases are defined in the JSON config
+ # If no alias exists, the original name is returned
+ assert get_provider_name("azure") == "azure" # No alias defined
+ assert get_provider_name("vertex_ai_beta") == "vertex_ai_beta" # No alias defined
+
+
+class TestBetaHeaderSupport:
+ """Test beta header support checks (unsupported list approach)."""
+
+ def test_anthropic_supports_all_headers(self):
+ """Anthropic should support all beta headers (empty unsupported list)."""
+ headers = [
+ "web-fetch-2025-09-10",
+ "web-search-2025-03-05",
+ "context-management-2025-06-27",
+ "compact-2026-01-12",
+ "structured-outputs-2025-11-13",
+ "advanced-tool-use-2025-11-20",
+ ]
+ for header in headers:
+ assert is_beta_header_supported(header, "anthropic")
+
+ def test_bedrock_unsupported_headers(self):
+ """Bedrock should block specific headers."""
+ # Not supported (in unsupported list)
+ assert not is_beta_header_supported("advanced-tool-use-2025-11-20", "bedrock")
+ assert not is_beta_header_supported(
+ "prompt-caching-scope-2026-01-05", "bedrock"
+ )
+ assert not is_beta_header_supported("structured-outputs-2025-11-13", "bedrock")
+
+ # Supported (not in unsupported list)
+ assert is_beta_header_supported("context-management-2025-06-27", "bedrock")
+ assert is_beta_header_supported("effort-2025-11-24", "bedrock")
+ assert is_beta_header_supported("tool-examples-2025-10-29", "bedrock")
+
+ def test_vertex_ai_unsupported_headers(self):
+ """Vertex AI should block specific headers."""
+ # Not supported (in unsupported list)
+ assert not is_beta_header_supported(
+ "prompt-caching-scope-2026-01-05", "vertex_ai"
+ )
+
+ # Supported (not in unsupported list)
+ assert is_beta_header_supported("web-search-2025-03-05", "vertex_ai")
+ assert is_beta_header_supported("context-management-2025-06-27", "vertex_ai")
+ assert is_beta_header_supported("effort-2025-11-24", "vertex_ai")
+ assert is_beta_header_supported("advanced-tool-use-2025-11-20", "vertex_ai")
+
+
+class TestBetaHeaderTransformation:
+ """Test beta header support checking (transformations happen in code, not here)."""
+
+ def test_anthropic_no_transformation(self):
+ """Anthropic headers should pass through (empty unsupported list)."""
+ header = "advanced-tool-use-2025-11-20"
+ assert get_provider_beta_header(header, "anthropic") == header
+
+ def test_bedrock_unsupported_returns_none(self):
+ """Bedrock should return None for unsupported headers."""
+ header = "advanced-tool-use-2025-11-20"
+ # This header is in bedrock's unsupported list
+ assert get_provider_beta_header(header, "bedrock") is None
+
+ def test_vertex_ai_supported_returns_original(self):
+ """Vertex AI should return original for supported headers."""
+ header = "advanced-tool-use-2025-11-20"
+ # This header is NOT in vertex_ai's unsupported list
+ assert get_provider_beta_header(header, "vertex_ai") == header
+
+ def test_unsupported_header_returns_none(self):
+ """Unsupported headers (in unsupported list) should return None."""
+ header = "prompt-caching-scope-2026-01-05"
+ assert get_provider_beta_header(header, "bedrock") is None
+
+ def test_supported_header_returns_original(self):
+ """Supported headers (not in unsupported list) should return original."""
+ header = "context-management-2025-06-27"
+ assert get_provider_beta_header(header, "bedrock") == header
+
+
+class TestFilterAndTransformBetaHeaders:
+ """Test the main filtering and transformation function."""
+
+ def test_anthropic_keeps_all_headers(self):
+ """Anthropic should keep all headers (empty unsupported list)."""
+ headers = [
+ "web-fetch-2025-09-10",
+ "context-management-2025-06-27",
+ "structured-outputs-2025-11-13",
+ "some-new-future-header-2026-01-01", # Even unknown headers pass through
+ ]
+ result = filter_and_transform_beta_headers(headers, "anthropic")
+ assert set(result) == set(headers)
+
+ def test_bedrock_filters_unsupported(self):
+ """Bedrock should filter out headers in unsupported list."""
+ headers = [
+ "context-management-2025-06-27", # Not in unsupported list -> kept
+ "advanced-tool-use-2025-11-20", # In unsupported list -> dropped
+ "structured-outputs-2025-11-13", # In unsupported list -> dropped
+ "prompt-caching-scope-2026-01-05", # In unsupported list -> dropped
+ ]
+ result = filter_and_transform_beta_headers(headers, "bedrock")
+ assert "context-management-2025-06-27" in result
+ assert "advanced-tool-use-2025-11-20" not in result
+ assert "structured-outputs-2025-11-13" not in result
+ assert "prompt-caching-scope-2026-01-05" not in result
+
+ def test_bedrock_no_transformations_in_filter(self):
+ """Bedrock filtering doesn't do transformations (those happen in code)."""
+ headers = ["advanced-tool-use-2025-11-20"]
+ result = filter_and_transform_beta_headers(headers, "bedrock")
+ # advanced-tool-use is in unsupported list, so it gets dropped
+ assert result == []
+
+ def test_vertex_ai_filters_unsupported(self):
+ """Vertex AI should filter unsupported headers."""
+ headers = [
+ "web-search-2025-03-05", # Not in unsupported list -> kept
+ "advanced-tool-use-2025-11-20", # Not in unsupported list -> kept
+ "prompt-caching-scope-2026-01-05", # In unsupported list -> dropped
+ ]
+ result = filter_and_transform_beta_headers(headers, "vertex_ai")
+ assert "web-search-2025-03-05" in result
+ assert "advanced-tool-use-2025-11-20" in result # Kept as-is, transformation happens in code
+ assert "prompt-caching-scope-2026-01-05" not in result
+
+ def test_empty_list_returns_empty(self):
+ """Empty list should return empty list."""
+ result = filter_and_transform_beta_headers([], "anthropic")
+ assert result == []
+
+ def test_bedrock_converse_more_restrictive(self):
+ """Bedrock Converse should be more restrictive than Bedrock."""
+ headers = [
+ "context-management-2025-06-27",
+ "advanced-tool-use-2025-11-20",
+ "tool-examples-2025-10-29",
+ ]
+
+ bedrock_result = filter_and_transform_beta_headers(headers, "bedrock")
+ converse_result = filter_and_transform_beta_headers(headers, "bedrock_converse")
+
+ # Bedrock Converse has more restrictions
+ # advanced-tool-use is in both unsupported lists
+ assert "advanced-tool-use-2025-11-20" not in bedrock_result
+ assert "advanced-tool-use-2025-11-20" not in converse_result
+
+ # tool-examples is supported on bedrock but not converse
+ # Actually, looking at the JSON, tool-examples is NOT in bedrock unsupported list
+ # So it should be in bedrock_result
+ assert "tool-examples-2025-10-29" in bedrock_result
+ # But it's not explicitly in converse unsupported list either, so it passes through
+ # Let me check the actual behavior
+ assert "context-management-2025-06-27" in bedrock_result
+ assert "context-management-2025-06-27" in converse_result
+
+ def test_unknown_future_headers_pass_through(self):
+ """Headers not in unsupported list should pass through (future-proof)."""
+ headers = ["some-new-beta-2026-05-01", "another-feature-2026-06-01"]
+ result = filter_and_transform_beta_headers(headers, "anthropic")
+ assert set(result) == set(headers)
+
+
+class TestUpdateHeadersWithFilteredBeta:
+ """Test the headers update function."""
+
+ def test_update_headers_anthropic(self):
+ """Test updating headers for Anthropic."""
+ headers = {
+ "anthropic-beta": "web-fetch-2025-09-10,context-management-2025-06-27"
+ }
+ result = update_headers_with_filtered_beta(headers, "anthropic")
+ assert "anthropic-beta" in result
+ beta_values = set(result["anthropic-beta"].split(","))
+ assert "web-fetch-2025-09-10" in beta_values
+ assert "context-management-2025-06-27" in beta_values
+
+ def test_update_headers_bedrock_filters(self):
+ """Test updating headers for Bedrock with filtering."""
+ headers = {
+ "anthropic-beta": "context-management-2025-06-27,advanced-tool-use-2025-11-20"
+ }
+ result = update_headers_with_filtered_beta(headers, "bedrock")
+ assert "anthropic-beta" in result
+ assert "context-management-2025-06-27" in result["anthropic-beta"]
+ assert "advanced-tool-use-2025-11-20" not in result["anthropic-beta"]
+
+ def test_update_headers_bedrock_no_transformations(self):
+ """Test that filtering doesn't do transformations (those happen in code)."""
+ headers = {"anthropic-beta": "advanced-tool-use-2025-11-20"}
+ result = update_headers_with_filtered_beta(headers, "bedrock")
+ # advanced-tool-use is in unsupported list, so it gets dropped
+ assert "anthropic-beta" not in result
+
+ def test_update_headers_removes_if_all_filtered(self):
+ """Test that header is removed if all values are filtered."""
+ headers = {"anthropic-beta": "advanced-tool-use-2025-11-20,prompt-caching-scope-2026-01-05"}
+ result = update_headers_with_filtered_beta(headers, "bedrock")
+ assert "anthropic-beta" not in result
+
+ def test_update_headers_no_beta_header(self):
+ """Test updating headers when no beta header exists."""
+ headers = {"content-type": "application/json"}
+ result = update_headers_with_filtered_beta(headers, "anthropic")
+ assert "anthropic-beta" not in result
+ assert headers == result
+
+
+class TestGetUnsupportedHeaders:
+ """Test getting unsupported headers for a provider."""
+
+ def test_anthropic_has_no_unsupported(self):
+ """Anthropic should have no unsupported headers (empty list)."""
+ anthropic_unsupported = get_unsupported_headers("anthropic")
+ assert len(anthropic_unsupported) == 0
+
+ def test_bedrock_converse_most_restrictive(self):
+ """Bedrock Converse should have more unsupported headers than Bedrock."""
+ bedrock_unsupported = get_unsupported_headers("bedrock")
+ converse_unsupported = get_unsupported_headers("bedrock_converse")
+ # Converse has more restrictions
+ assert len(converse_unsupported) >= len(bedrock_unsupported)
+
+ def test_all_providers_have_config(self):
+ """All providers should have a configuration entry."""
+ providers = ["anthropic", "azure_ai", "bedrock", "bedrock_converse", "vertex_ai"]
+ for provider in providers:
+ unsupported = get_unsupported_headers(provider)
+ # Should return a list (even if empty)
+ assert isinstance(unsupported, list), f"Provider {provider} should return a list"
+
+
+class TestEdgeCases:
+ """Test edge cases and error handling."""
+
+ def test_unknown_provider(self):
+ """Unknown provider with no config should pass through all headers."""
+ result = filter_and_transform_beta_headers(
+ ["context-management-2025-06-27"], "unknown_provider"
+ )
+ # Unknown providers have no unsupported list, so headers pass through
+ assert "context-management-2025-06-27" in result
+
+ def test_whitespace_handling(self):
+ """Headers with whitespace should be handled correctly."""
+ headers = [
+ " context-management-2025-06-27 ",
+ " web-search-2025-03-05 ",
+ ]
+ result = filter_and_transform_beta_headers(headers, "anthropic")
+ assert len(result) == 2
+
+ def test_duplicate_headers(self):
+ """Duplicate headers should be deduplicated."""
+ headers = [
+ "context-management-2025-06-27",
+ "context-management-2025-06-27",
+ ]
+ result = filter_and_transform_beta_headers(headers, "anthropic")
+ assert len(result) == 1
+
+ def test_case_sensitivity(self):
+ """Headers should be case-sensitive."""
+ # Correct case - should pass through for anthropic (no unsupported list)
+ headers = ["context-management-2025-06-27"]
+ result = filter_and_transform_beta_headers(headers, "anthropic")
+ assert len(result) == 1
+
+ # Wrong case - should still pass through (not in unsupported list)
+ headers = ["Context-Management-2025-06-27"]
+ result = filter_and_transform_beta_headers(headers, "anthropic")
+ assert len(result) == 1 # Passes through because anthropic has empty unsupported list
diff --git a/tests/test_litellm/test_claude_opus_4_6_config.py b/tests/test_litellm/test_claude_opus_4_6_config.py
index 8c00ad4b36f..071d0a26369 100644
--- a/tests/test_litellm/test_claude_opus_4_6_config.py
+++ b/tests/test_litellm/test_claude_opus_4_6_config.py
@@ -26,12 +26,6 @@ def test_opus_4_6_model_pricing_and_capabilities():
"tool_use_system_prompt_tokens": 346,
"max_input_tokens": 1000000,
},
- "anthropic.claude-opus-4-6-v1:0": {
- "provider": "bedrock_converse",
- "has_long_context_pricing": True,
- "tool_use_system_prompt_tokens": 346,
- "max_input_tokens": 1000000,
- },
"anthropic.claude-opus-4-6-v1": {
"provider": "bedrock_converse",
"has_long_context_pricing": True,
@@ -88,16 +82,6 @@ def test_opus_4_6_bedrock_regional_model_pricing():
model_data = json.load(f)
expected_models = {
- "global.anthropic.claude-opus-4-6-v1:0": {
- "input_cost_per_token": 5e-06,
- "output_cost_per_token": 2.5e-05,
- "cache_creation_input_token_cost": 6.25e-06,
- "cache_read_input_token_cost": 5e-07,
- "input_cost_per_token_above_200k_tokens": 1e-05,
- "output_cost_per_token_above_200k_tokens": 3.75e-05,
- "cache_creation_input_token_cost_above_200k_tokens": 1.25e-05,
- "cache_read_input_token_cost_above_200k_tokens": 1e-06,
- },
"global.anthropic.claude-opus-4-6-v1": {
"input_cost_per_token": 5e-06,
"output_cost_per_token": 2.5e-05,
@@ -108,16 +92,6 @@ def test_opus_4_6_bedrock_regional_model_pricing():
"cache_creation_input_token_cost_above_200k_tokens": 1.25e-05,
"cache_read_input_token_cost_above_200k_tokens": 1e-06,
},
- "us.anthropic.claude-opus-4-6-v1:0": {
- "input_cost_per_token": 5.5e-06,
- "output_cost_per_token": 2.75e-05,
- "cache_creation_input_token_cost": 6.875e-06,
- "cache_read_input_token_cost": 5.5e-07,
- "input_cost_per_token_above_200k_tokens": 1.1e-05,
- "output_cost_per_token_above_200k_tokens": 4.125e-05,
- "cache_creation_input_token_cost_above_200k_tokens": 1.375e-05,
- "cache_read_input_token_cost_above_200k_tokens": 1.1e-06,
- },
"us.anthropic.claude-opus-4-6-v1": {
"input_cost_per_token": 5.5e-06,
"output_cost_per_token": 2.75e-05,
@@ -128,16 +102,6 @@ def test_opus_4_6_bedrock_regional_model_pricing():
"cache_creation_input_token_cost_above_200k_tokens": 1.375e-05,
"cache_read_input_token_cost_above_200k_tokens": 1.1e-06,
},
- "eu.anthropic.claude-opus-4-6-v1:0": {
- "input_cost_per_token": 5.5e-06,
- "output_cost_per_token": 2.75e-05,
- "cache_creation_input_token_cost": 6.875e-06,
- "cache_read_input_token_cost": 5.5e-07,
- "input_cost_per_token_above_200k_tokens": 1.1e-05,
- "output_cost_per_token_above_200k_tokens": 4.125e-05,
- "cache_creation_input_token_cost_above_200k_tokens": 1.375e-05,
- "cache_read_input_token_cost_above_200k_tokens": 1.1e-06,
- },
"eu.anthropic.claude-opus-4-6-v1": {
"input_cost_per_token": 5.5e-06,
"output_cost_per_token": 2.75e-05,
@@ -148,7 +112,7 @@ def test_opus_4_6_bedrock_regional_model_pricing():
"cache_creation_input_token_cost_above_200k_tokens": 1.375e-05,
"cache_read_input_token_cost_above_200k_tokens": 1.1e-06,
},
- "apac.anthropic.claude-opus-4-6-v1:0": {
+ "apac.anthropic.claude-opus-4-6-v1": {
"input_cost_per_token": 5.5e-06,
"output_cost_per_token": 2.75e-05,
"cache_creation_input_token_cost": 6.875e-06,
@@ -212,14 +176,8 @@ def test_opus_4_6_alias_and_dated_metadata_match():
def test_opus_4_6_bedrock_converse_registration():
- assert "anthropic.claude-opus-4-6-v1:0" in litellm.BEDROCK_CONVERSE_MODELS
assert "anthropic.claude-opus-4-6-v1" in litellm.BEDROCK_CONVERSE_MODELS
- assert "anthropic.claude-opus-4-6-v1" in litellm.bedrock_converse_models
- assert "global.anthropic.claude-opus-4-6-v1:0" in litellm.bedrock_converse_models
assert "global.anthropic.claude-opus-4-6-v1" in litellm.bedrock_converse_models
- assert "us.anthropic.claude-opus-4-6-v1:0" in litellm.bedrock_converse_models
assert "us.anthropic.claude-opus-4-6-v1" in litellm.bedrock_converse_models
- assert "eu.anthropic.claude-opus-4-6-v1:0" in litellm.bedrock_converse_models
assert "eu.anthropic.claude-opus-4-6-v1" in litellm.bedrock_converse_models
- assert "apac.anthropic.claude-opus-4-6-v1:0" in litellm.bedrock_converse_models
assert "apac.anthropic.claude-opus-4-6-v1" in litellm.bedrock_converse_models