diff --git a/docs/my-website/blog/claude_opus_4_6/index.md b/docs/my-website/blog/claude_opus_4_6/index.md index b1836cfaef5..75b088c533d 100644 --- a/docs/my-website/blog/claude_opus_4_6/index.md +++ b/docs/my-website/blog/claude_opus_4_6/index.md @@ -3,6 +3,10 @@ slug: claude_opus_4_6 title: "Day 0 Support: Claude Opus 4.6" date: 2026-02-05T10:00:00 authors: + - name: Sameer Kankute + title: SWE @ LiteLLM (LLM Translation) + url: https://www.linkedin.com/in/sameer-kankute/ + image_url: https://pbs.twimg.com/profile_images/2001352686994907136/ONgNuSk5_400x400.jpg - name: Ishaan Jaff title: "CTO, LiteLLM" url: https://www.linkedin.com/in/reffajnaahsi/ @@ -219,6 +223,156 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \ -## More Features Coming Soon +## Compaction + +Litellm supports enabling compaction for the new claude-opus-4-6. + +### Enabling Compaction + +To enable compaction, add the `context_management` parameter with the `compact_20260112` edit type: + +```bash +curl --location 'http://0.0.0.0:4000/chat/completions' \ +--header 'Content-Type: application/json' \ +--header 'Authorization: Bearer $LITELLM_KEY' \ +--data '{ + "model": "claude-opus-4-6", + "messages": [ + { + "role": "user", + "content": "What is the weather in San Francisco?" + } + ], + "context_management": { + "edits": [ + { + "type": "compact_20260112" + } + ] + }, + "max_tokens": 100 +}' +``` +All the parameters supported for context_management by anthropic are supported and can be directly added. Litellm automatically adds the `compact-2026-01-12` beta header in the request. + + +### Response with Compaction Block + +The response will include the compaction summary in `provider_specific_fields.compaction_blocks`: + +```json +{ + "id": "chatcmpl-a6c105a3-4b25-419e-9551-c800633b6cb2", + "created": 1770357619, + "model": "claude-opus-4-6", + "object": "chat.completion", + "choices": [ + { + "finish_reason": "length", + "index": 0, + "message": { + "content": "I don't have access to real-time data, so I can't provide the current weather in San Francisco. To get up-to-date weather information, I'd recommend checking:\n\n- **Weather websites** like weather.com, accuweather.com, or wunderground.com\n- **Search engines** – just Google \"San Francisco weather\"\n- **Weather apps** on your phone (e.g., Apple Weather, Google Weather)\n- **National", + "role": "assistant", + "provider_specific_fields": { + "compaction_blocks": [ + { + "type": "compaction", + "content": "Summary of the conversation: The user requested help building a web scraper..." + } + ] + } + } + } + ], + "usage": { + "completion_tokens": 100, + "prompt_tokens": 86, + "total_tokens": 186 + } +} +``` + +### Using Compaction Blocks in Follow-up Requests + +To continue the conversation with compaction, include the compaction block in the assistant message's `provider_specific_fields`: + +```bash +curl --location 'http://0.0.0.0:4000/chat/completions' \ +--header 'Content-Type: application/json' \ +--header 'Authorization: Bearer $LITELLM_KEY' \ +--data '{ + "model": "claude-opus-4-6", + "messages": [ + { + "role": "user", + "content": "How can I build a web scraper?" + }, + { + "role": "assistant", + "content": [ + { + "type": "text", + "text": "Certainly! To build a basic web scraper, you'll typically use a programming language like Python along with libraries such as `requests` (for fetching web pages) and `BeautifulSoup` (for parsing HTML). Here's a basic example:\n\n```python\nimport requests\nfrom bs4 import BeautifulSoup\n\nurl = 'https://example.com'\nresponse = requests.get(url)\nsoup = BeautifulSoup(response.text, 'html.parser')\n\n# Extract and print all text\ntext = soup.get_text()\nprint(text)\n```\n\nLet me know what you're interested in scraping or if you need help with a specific website!" + } + ], + "provider_specific_fields": { + "compaction_blocks": [ + { + "type": "compaction", + "content": "Summary of the conversation: The user asked how to build a web scraper, and the assistant gave an overview using Python with requests and BeautifulSoup." + } + ] + } + }, + { + "role": "user", + "content": "How do I use it to scrape product prices?" + } + ], + "context_management": { + "edits": [ + { + "type": "compact_20260112" + } + ] + }, + "max_tokens": 100 +}' +``` + +### Streaming Support + +Compaction blocks are also supported in streaming mode. You'll receive: +- `compaction_start` event when a compaction block begins +- `compaction_delta` events with the compaction content +- The accumulated `compaction_blocks` in `provider_specific_fields` + + +## Effort Levels + +Four effort levels available: `low`, `medium`, `high` (default), and `max`. Pass directly via the `effort` parameter: + +```bash +curl --location 'http://0.0.0.0:4000/chat/completions' \ +--header 'Content-Type: application/json' \ +--header 'Authorization: Bearer $LITELLM_KEY' \ +--data '{ + "model": "claude-opus-4-6", + "messages": [ + { + "role": "user", + "content": "Explain quantum computing" + } + ], + "effort": "max" +}' +``` + +## 1M Token Context (Beta) + +Opus 4.6 supports 1M token context. Premium pricing applies for prompts exceeding 200k tokens ($10/$37.50 per million input/output tokens). LiteLLM supports cost calculations for 1M token contexts. + +## US-Only Inference + +Available at 1.1× token pricing. LiteLLM supports this pricing model. -We're actively working on supporting new features for Claude Opus 4.6. Stay tuned for updates! diff --git a/docs/my-website/docs/tutorials/claude_code_beta_headers.md b/docs/my-website/docs/tutorials/claude_code_beta_headers.md new file mode 100644 index 00000000000..9c1645e0277 --- /dev/null +++ b/docs/my-website/docs/tutorials/claude_code_beta_headers.md @@ -0,0 +1,129 @@ +import Image from '@theme/IdealImage'; + +# Claude Code - Fixing Invalid Beta Header Errors + +When using Claude Code with LiteLLM and non-Anthropic providers (Bedrock, Azure AI, Vertex AI), you may encounter "invalid beta header" errors. This guide explains how to fix these errors locally or contribute a fix to LiteLLM. + +## What Are Beta Headers? + +Anthropic uses beta headers to enable experimental features in Claude. When you use Claude Code, it may send beta headers like: + +``` +anthropic-beta: prompt-caching-scope-2026-01-05,advanced-tool-use-2025-11-20 +``` + +However, not all providers support all Anthropic beta features. When an unsupported beta header is sent to a provider, you'll see an error. + +## Common Error Message + +```bash +Error: The model returned the following errors: invalid beta flag +``` + +## How LiteLLM Handles Beta Headers + +LiteLLM automatically filters out unsupported beta headers using a configuration file: + +``` +litellm/litellm/anthropic_beta_headers_config.json +``` + +This JSON file lists which beta headers are **unsupported** for each provider. Headers not in the unsupported list are passed through to the provider. + +## Quick Fix: Update Config Locally + +If you encounter an invalid beta header error, you can fix it immediately by updating the config file locally. + +### Step 1: Locate the Config File + +Find the file in your LiteLLM installation: + +```bash +# If installed via pip +cd $(python -c "import litellm; import os; print(os.path.dirname(litellm.__file__))") + +# The config file is at: +# litellm/anthropic_beta_headers_config.json +``` + +### Step 2: Add the Unsupported Header + +Open `anthropic_beta_headers_config.json` and add the problematic header to the appropriate provider's list: + +```json title="anthropic_beta_headers_config.json" +{ + "description": "Unsupported Anthropic beta headers for each provider. Headers listed here will be dropped. Headers not listed are passed through as-is.", + "anthropic": [], + "azure_ai": [], + "bedrock_converse": [ + "prompt-caching-scope-2026-01-05", + "bash_20250124", + "bash_20241022", + "text_editor_20250124", + "text_editor_20241022", + "compact-2026-01-12", + "advanced-tool-use-2025-11-20", + "web-fetch-2025-09-10", + "code-execution-2025-08-25", + "skills-2025-10-02", + "files-api-2025-04-14" + ], + "bedrock": [ + "advanced-tool-use-2025-11-20", + "prompt-caching-scope-2026-01-05", + "structured-outputs-2025-11-13", + "web-fetch-2025-09-10", + "code-execution-2025-08-25", + "skills-2025-10-02", + "files-api-2025-04-14" + ], + "vertex_ai": [ + "prompt-caching-scope-2026-01-05" + ] +} +``` + +### Step 3: Restart Your Application + +After updating the config file, restart your LiteLLM proxy or application: + +```bash +# If using LiteLLM proxy +litellm --config config.yaml + +# If using Python SDK +# Just restart your Python application +``` + +The updated configuration will be loaded automatically. + +## Contributing a Fix to LiteLLM + +Help the community by contributing your fix! If your local changes work, please raise a PR with the addition of the header and we will merge it. + + +## How Beta Header Filtering Works + +When you make a request through LiteLLM: + +```mermaid +sequenceDiagram + participant CC as Claude Code + participant LP as LiteLLM + participant Config as Beta Headers Config + participant Provider as Provider (Bedrock/Azure/etc) + + CC->>LP: Request with beta headers + Note over CC,LP: anthropic-beta: header1,header2,header3 + + LP->>Config: Load unsupported headers for provider + Config-->>LP: Returns unsupported list + + Note over LP: Filter headers:
- Remove unsupported
- Keep supported + + LP->>Provider: Request with filtered headers + Note over LP,Provider: anthropic-beta: header2
(header1, header3 removed) + + Provider-->>LP: Success response + LP-->>CC: Response +``` \ No newline at end of file diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index fda0e3be4e4..688ad714370 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -129,6 +129,7 @@ const sidebars = { "tutorials/claude_mcp", "tutorials/claude_non_anthropic_models", "tutorials/claude_code_plugin_marketplace", + "tutorials/claude_code_beta_headers", ] }, "tutorials/opencode_integration", diff --git a/litellm/anthropic_beta_headers_config.json b/litellm/anthropic_beta_headers_config.json new file mode 100644 index 00000000000..193091c0176 --- /dev/null +++ b/litellm/anthropic_beta_headers_config.json @@ -0,0 +1,30 @@ +{ + "description": "Unsupported Anthropic beta headers for each provider. Headers listed here will be dropped. Headers not listed are passed through as-is.", + "anthropic": [], + "azure_ai": [], + "bedrock_converse": [ + "prompt-caching-scope-2026-01-05", + "bash_20250124", + "bash_20241022", + "text_editor_20250124", + "text_editor_20241022", + "compact-2026-01-12", + "advanced-tool-use-2025-11-20", + "web-fetch-2025-09-10", + "code-execution-2025-08-25", + "skills-2025-10-02", + "files-api-2025-04-14" + ], + "bedrock": [ + "advanced-tool-use-2025-11-20", + "prompt-caching-scope-2026-01-05", + "structured-outputs-2025-11-13", + "web-fetch-2025-09-10", + "code-execution-2025-08-25", + "skills-2025-10-02", + "files-api-2025-04-14" + ], + "vertex_ai": [ + "prompt-caching-scope-2026-01-05" + ] +} diff --git a/litellm/anthropic_beta_headers_manager.py b/litellm/anthropic_beta_headers_manager.py new file mode 100644 index 00000000000..2643f4c03fa --- /dev/null +++ b/litellm/anthropic_beta_headers_manager.py @@ -0,0 +1,221 @@ +""" +Centralized manager for Anthropic beta headers across different providers. + +This module provides utilities to: +1. Load beta header configuration from JSON (lists unsupported headers per provider) +2. Filter out unsupported beta headers +3. Handle provider-specific header name mappings (e.g., advanced-tool-use -> tool-search-tool) + +Design: +- JSON config lists UNSUPPORTED headers for each provider +- Headers not in the unsupported list are passed through +- Header mappings allow renaming headers for specific providers +""" + +import json +import os +from typing import Dict, List, Optional, Set + +from litellm.litellm_core_utils.litellm_logging import verbose_logger + +# Cache for the loaded configuration +_BETA_HEADERS_CONFIG: Optional[Dict] = None + + +def _load_beta_headers_config() -> Dict: + """ + Load the beta headers configuration from JSON file. + Uses caching to avoid repeated file reads. + + Returns: + Dict containing the beta headers configuration + """ + global _BETA_HEADERS_CONFIG + + if _BETA_HEADERS_CONFIG is not None: + return _BETA_HEADERS_CONFIG + + config_path = os.path.join( + os.path.dirname(__file__), + "anthropic_beta_headers_config.json" + ) + + try: + with open(config_path, "r") as f: + _BETA_HEADERS_CONFIG = json.load(f) + verbose_logger.debug(f"Loaded beta headers config from {config_path}") + return _BETA_HEADERS_CONFIG + except Exception as e: + verbose_logger.error(f"Failed to load beta headers config: {e}") + # Return empty config as fallback + return { + "anthropic": [], + "azure_ai": [], + "bedrock": [], + "bedrock_converse": [], + "vertex_ai": [] + } + + +def get_provider_name(provider: str) -> str: + """ + Resolve provider aliases to canonical provider names. + + Args: + provider: Provider name (may be an alias) + + Returns: + Canonical provider name + """ + config = _load_beta_headers_config() + aliases = config.get("provider_aliases", {}) + return aliases.get(provider, provider) + + +def filter_and_transform_beta_headers( + beta_headers: List[str], + provider: str, +) -> List[str]: + """ + Filter beta headers based on provider's unsupported list. + + This function: + 1. Removes headers that are in the provider's unsupported list + 2. Passes through all other headers as-is + + Note: Header transformations/mappings (e.g., advanced-tool-use -> tool-search-tool) + are handled in each provider's transformation code, not here. + + Args: + beta_headers: List of Anthropic beta header values + provider: Provider name (e.g., "anthropic", "bedrock", "vertex_ai") + + Returns: + List of filtered beta headers for the provider + """ + if not beta_headers: + return [] + + config = _load_beta_headers_config() + provider = get_provider_name(provider) + + # Get unsupported headers for this provider + unsupported_headers = set(config.get(provider, [])) + + filtered_headers: Set[str] = set() + + for header in beta_headers: + header = header.strip() + + # Skip if header is unsupported + if header in unsupported_headers: + verbose_logger.debug( + f"Dropping unsupported beta header '{header}' for provider '{provider}'" + ) + continue + + # Pass through as-is + filtered_headers.add(header) + + return sorted(list(filtered_headers)) + + +def is_beta_header_supported( + beta_header: str, + provider: str, +) -> bool: + """ + Check if a specific beta header is supported by a provider. + + Args: + beta_header: The Anthropic beta header value + provider: Provider name + + Returns: + True if the header is supported (not in unsupported list), False otherwise + """ + config = _load_beta_headers_config() + provider = get_provider_name(provider) + unsupported_headers = set(config.get(provider, [])) + return beta_header not in unsupported_headers + + +def get_provider_beta_header( + anthropic_beta_header: str, + provider: str, +) -> Optional[str]: + """ + Check if a beta header is supported by a provider. + + Note: This does NOT handle header transformations/mappings. + Those are handled in each provider's transformation code. + + Args: + anthropic_beta_header: The Anthropic beta header value + provider: Provider name + + Returns: + The original header if supported, or None if unsupported + """ + config = _load_beta_headers_config() + provider = get_provider_name(provider) + + # Check if unsupported + unsupported_headers = set(config.get(provider, [])) + if anthropic_beta_header in unsupported_headers: + return None + + return anthropic_beta_header + + +def update_headers_with_filtered_beta( + headers: dict, + provider: str, +) -> dict: + """ + Update headers dict by filtering and transforming anthropic-beta header values. + Modifies the headers dict in place and returns it. + + Args: + headers: Request headers dict (will be modified in place) + provider: Provider name + + Returns: + Updated headers dict + """ + existing_beta = headers.get("anthropic-beta") + if not existing_beta: + return headers + + # Parse existing beta headers + beta_values = [b.strip() for b in existing_beta.split(",") if b.strip()] + + # Filter and transform based on provider + filtered_beta_values = filter_and_transform_beta_headers( + beta_headers=beta_values, + provider=provider, + ) + + # Update or remove the header + if filtered_beta_values: + headers["anthropic-beta"] = ",".join(filtered_beta_values) + else: + # Remove the header if no values remain + headers.pop("anthropic-beta", None) + + return headers + + +def get_unsupported_headers(provider: str) -> List[str]: + """ + Get all beta headers that are unsupported by a provider. + + Args: + provider: Provider name + + Returns: + List of unsupported Anthropic beta header names + """ + config = _load_beta_headers_config() + provider = get_provider_name(provider) + return config.get(provider, []) diff --git a/litellm/litellm_core_utils/core_helpers.py b/litellm/litellm_core_utils/core_helpers.py index 00695cbfb5b..7c8e2ebeaff 100644 --- a/litellm/litellm_core_utils/core_helpers.py +++ b/litellm/litellm_core_utils/core_helpers.py @@ -94,8 +94,8 @@ def map_finish_reason( return "length" elif finish_reason == "tool_use": # anthropic return "tool_calls" - elif finish_reason == "content_filtered": - return "content_filter" + elif finish_reason == "compaction": + return "length" return finish_reason diff --git a/litellm/litellm_core_utils/prompt_templates/factory.py b/litellm/litellm_core_utils/prompt_templates/factory.py index 53d2ca2f23f..f9ecd78ff1c 100644 --- a/litellm/litellm_core_utils/prompt_templates/factory.py +++ b/litellm/litellm_core_utils/prompt_templates/factory.py @@ -2190,6 +2190,16 @@ def anthropic_messages_pt( # noqa: PLR0915 while msg_i < len(messages) and messages[msg_i]["role"] == "assistant": assistant_content_block: ChatCompletionAssistantMessage = messages[msg_i] # type: ignore + # Extract compaction_blocks from provider_specific_fields and add them first + _provider_specific_fields_raw = assistant_content_block.get( + "provider_specific_fields" + ) + if isinstance(_provider_specific_fields_raw, dict): + _compaction_blocks = _provider_specific_fields_raw.get("compaction_blocks") + if _compaction_blocks and isinstance(_compaction_blocks, list): + # Add compaction blocks at the beginning of assistant content : https://platform.claude.com/docs/en/build-with-claude/compaction + assistant_content.extend(_compaction_blocks) # type: ignore + thinking_blocks = assistant_content_block.get("thinking_blocks", None) if ( thinking_blocks is not None diff --git a/litellm/llms/anthropic/chat/handler.py b/litellm/llms/anthropic/chat/handler.py index 6a9aafd076b..485e95d6489 100644 --- a/litellm/llms/anthropic/chat/handler.py +++ b/litellm/llms/anthropic/chat/handler.py @@ -512,6 +512,9 @@ def __init__( # Accumulate web_search_tool_result blocks for multi-turn reconstruction # See: https://github.com/BerriAI/litellm/issues/17737 self.web_search_results: List[Dict[str, Any]] = [] + + # Accumulate compaction blocks for multi-turn reconstruction + self.compaction_blocks: List[Dict[str, Any]] = [] def check_empty_tool_call_args(self) -> bool: """ @@ -592,6 +595,12 @@ def _content_block_delta_helper(self, chunk: dict) -> Tuple[ ) ] provider_specific_fields["thinking_blocks"] = thinking_blocks + elif "content" in content_block["delta"] and content_block["delta"].get("type") == "compaction_delta": + # Handle compaction delta + provider_specific_fields["compaction_delta"] = { + "type": "compaction_delta", + "content": content_block["delta"]["content"] + } return text, tool_use, thinking_blocks, provider_specific_fields @@ -721,6 +730,20 @@ def chunk_parser(self, chunk: dict) -> ModelResponseStream: # noqa: PLR0915 provider_specific_fields=provider_specific_fields, ) + elif content_block_start["content_block"]["type"] == "compaction": + # Handle compaction blocks + # The full content comes in content_block_start + self.compaction_blocks.append( + content_block_start["content_block"] + ) + provider_specific_fields["compaction_blocks"] = ( + self.compaction_blocks + ) + provider_specific_fields["compaction_start"] = { + "type": "compaction", + "content": content_block_start["content_block"].get("content", "") + } + elif content_block_start["content_block"]["type"].endswith("_tool_result"): # Handle all tool result types (web_search, bash_code_execution, text_editor, etc.) content_type = content_block_start["content_block"]["type"] diff --git a/litellm/llms/anthropic/chat/transformation.py b/litellm/llms/anthropic/chat/transformation.py index 1b61b533275..02b8d952445 100644 --- a/litellm/llms/anthropic/chat/transformation.py +++ b/litellm/llms/anthropic/chat/transformation.py @@ -170,9 +170,10 @@ def convert_tool_use_to_openai_format( tool_call["caller"] = cast(Dict[str, Any], anthropic_tool_content["caller"]) # type: ignore[typeddict-item] return tool_call - def _is_claude_opus_4_5(self, model: str) -> bool: + @staticmethod + def _is_claude_opus_4_6(model: str) -> bool: """Check if the model is Claude Opus 4.5.""" - return "opus-4-5" in model.lower() or "opus_4_5" in model.lower() + return "opus-4-6" in model.lower() or "opus_4_6" in model.lower() def get_supported_openai_params(self, model: str): params = [ @@ -659,32 +660,38 @@ def _map_stop_sequences( @staticmethod def _map_reasoning_effort( - reasoning_effort: Optional[Union[REASONING_EFFORT, str]], + reasoning_effort: Optional[Union[REASONING_EFFORT, str]], + model: str, ) -> Optional[AnthropicThinkingParam]: - if reasoning_effort is None: - return None - elif reasoning_effort == "low": - return AnthropicThinkingParam( - type="enabled", - budget_tokens=DEFAULT_REASONING_EFFORT_LOW_THINKING_BUDGET, - ) - elif reasoning_effort == "medium": - return AnthropicThinkingParam( - type="enabled", - budget_tokens=DEFAULT_REASONING_EFFORT_MEDIUM_THINKING_BUDGET, - ) - elif reasoning_effort == "high": - return AnthropicThinkingParam( - type="enabled", - budget_tokens=DEFAULT_REASONING_EFFORT_HIGH_THINKING_BUDGET, - ) - elif reasoning_effort == "minimal": + if AnthropicConfig._is_claude_opus_4_6(model): return AnthropicThinkingParam( - type="enabled", - budget_tokens=DEFAULT_REASONING_EFFORT_MINIMAL_THINKING_BUDGET, + type="adaptive", ) else: - raise ValueError(f"Unmapped reasoning effort: {reasoning_effort}") + if reasoning_effort is None: + return None + elif reasoning_effort == "low": + return AnthropicThinkingParam( + type="enabled", + budget_tokens=DEFAULT_REASONING_EFFORT_LOW_THINKING_BUDGET, + ) + elif reasoning_effort == "medium": + return AnthropicThinkingParam( + type="enabled", + budget_tokens=DEFAULT_REASONING_EFFORT_MEDIUM_THINKING_BUDGET, + ) + elif reasoning_effort == "high": + return AnthropicThinkingParam( + type="enabled", + budget_tokens=DEFAULT_REASONING_EFFORT_HIGH_THINKING_BUDGET, + ) + elif reasoning_effort == "minimal": + return AnthropicThinkingParam( + type="enabled", + budget_tokens=DEFAULT_REASONING_EFFORT_MINIMAL_THINKING_BUDGET, + ) + else: + raise ValueError(f"Unmapped reasoning effort: {reasoning_effort}") def _extract_json_schema_from_response_format( self, value: Optional[dict] @@ -860,13 +867,8 @@ def map_openai_params( # noqa: PLR0915 if param == "thinking": optional_params["thinking"] = value elif param == "reasoning_effort" and isinstance(value, str): - # For Claude Opus 4.5, map reasoning_effort to output_config - if self._is_claude_opus_4_5(model): - optional_params["output_config"] = {"effort": value} - - # For other models, map to thinking parameter optional_params["thinking"] = AnthropicConfig._map_reasoning_effort( - value + reasoning_effort=value, model=model ) elif param == "web_search_options" and isinstance(value, dict): hosted_web_search_tool = self.map_web_search_tool( @@ -877,6 +879,9 @@ def map_openai_params( # noqa: PLR0915 ) elif param == "extra_headers": optional_params["extra_headers"] = value + elif param == "context_management" and isinstance(value, dict): + # Pass through Anthropic-specific context_management parameter + optional_params["context_management"] = value ## handle thinking tokens self.update_optional_params_with_thinking_tokens( @@ -1026,9 +1031,37 @@ def _ensure_beta_header(self, headers: dict, beta_value: str) -> None: if beta_value not in existing_values: headers["anthropic-beta"] = f"{existing_beta}, {beta_value}" - def _ensure_context_management_beta_header(self, headers: dict) -> None: - beta_value = ANTHROPIC_BETA_HEADER_VALUES.CONTEXT_MANAGEMENT_2025_06_27.value - self._ensure_beta_header(headers, beta_value) + def _ensure_context_management_beta_header( + self, headers: dict, context_management: dict + ) -> None: + """ + Add appropriate beta headers based on context_management edits. + - If any edit has type "compact_20260112", add compact-2026-01-12 header + - For all other edits, add context-management-2025-06-27 header + """ + edits = context_management.get("edits", []) + + has_compact = False + has_other = False + + for edit in edits: + edit_type = edit.get("type", "") + if edit_type == "compact_20260112": + has_compact = True + else: + has_other = True + + # Add compact header if any compact edits exist + if has_compact: + self._ensure_beta_header( + headers, ANTHROPIC_BETA_HEADER_VALUES.COMPACT_2026_01_12.value + ) + + # Add context management header if any other edits exist + if has_other: + self._ensure_beta_header( + headers, ANTHROPIC_BETA_HEADER_VALUES.CONTEXT_MANAGEMENT_2025_06_27.value + ) def update_headers_with_optional_anthropic_beta( self, headers: dict, optional_params: dict @@ -1056,7 +1089,9 @@ def update_headers_with_optional_anthropic_beta( headers, ANTHROPIC_BETA_HEADER_VALUES.CONTEXT_MANAGEMENT_2025_06_27.value ) if optional_params.get("context_management") is not None: - self._ensure_context_management_beta_header(headers) + self._ensure_context_management_beta_header( + headers, optional_params["context_management"] + ) if optional_params.get("output_format") is not None: self._ensure_beta_header( headers, ANTHROPIC_BETA_HEADER_VALUES.STRUCTURED_OUTPUT_2025_09_25.value @@ -1225,6 +1260,7 @@ def extract_response_content(self, completion_response: dict) -> Tuple[ List[ChatCompletionToolCallChunk], Optional[List[Any]], Optional[List[Any]], + Optional[List[Any]], ]: text_content = "" citations: Optional[List[Any]] = None @@ -1237,6 +1273,7 @@ def extract_response_content(self, completion_response: dict) -> Tuple[ tool_calls: List[ChatCompletionToolCallChunk] = [] web_search_results: Optional[List[Any]] = None tool_results: Optional[List[Any]] = None + compaction_blocks: Optional[List[Any]] = None for idx, content in enumerate(completion_response["content"]): if content["type"] == "text": text_content += content["text"] @@ -1278,6 +1315,12 @@ def extract_response_content(self, completion_response: dict) -> Tuple[ thinking_blocks.append( cast(ChatCompletionRedactedThinkingBlock, content) ) + + ## COMPACTION + elif content["type"] == "compaction": + if compaction_blocks is None: + compaction_blocks = [] + compaction_blocks.append(content) ## CITATIONS if content.get("citations") is not None: @@ -1299,7 +1342,7 @@ def extract_response_content(self, completion_response: dict) -> Tuple[ if thinking_content is not None: reasoning_content += thinking_content - return text_content, citations, thinking_blocks, reasoning_content, tool_calls, web_search_results, tool_results + return text_content, citations, thinking_blocks, reasoning_content, tool_calls, web_search_results, tool_results, compaction_blocks def calculate_usage( self, @@ -1316,6 +1359,10 @@ def calculate_usage( cache_creation_token_details: Optional[CacheCreationTokenDetails] = None web_search_requests: Optional[int] = None tool_search_requests: Optional[int] = None + inference_geo: Optional[str] = None + if "inference_geo" in _usage and _usage["inference_geo"] is not None: + inference_geo = _usage["inference_geo"] + if ( "cache_creation_input_tokens" in _usage and _usage["cache_creation_input_tokens"] is not None @@ -1399,6 +1446,7 @@ def calculate_usage( if (web_search_requests is not None or tool_search_requests is not None) else None ), + inference_geo=inference_geo, ) return usage @@ -1442,6 +1490,7 @@ def transform_parsed_response( tool_calls, web_search_results, tool_results, + compaction_blocks, ) = self.extract_response_content(completion_response=completion_response) if ( @@ -1469,6 +1518,8 @@ def transform_parsed_response( provider_specific_fields["tool_results"] = tool_results if container is not None: provider_specific_fields["container"] = container + if compaction_blocks is not None: + provider_specific_fields["compaction_blocks"] = compaction_blocks _message = litellm.Message( tool_calls=tool_calls, @@ -1477,6 +1528,7 @@ def transform_parsed_response( thinking_blocks=thinking_blocks, reasoning_content=reasoning_content, ) + _message.provider_specific_fields = provider_specific_fields ## HANDLE JSON MODE - anthropic returns single function call json_mode_message = self._transform_response_for_json_mode( @@ -1507,18 +1559,7 @@ def transform_parsed_response( model_response.created = int(time.time()) model_response.model = completion_response["model"] - context_management_response = completion_response.get("context_management") - if context_management_response is not None: - _hidden_params["context_management"] = context_management_response - try: - model_response.__dict__["context_management"] = ( - context_management_response - ) - except Exception: - pass - model_response._hidden_params = _hidden_params - return model_response def get_prefix_prompt(self, messages: List[AllMessageValues]) -> Optional[str]: diff --git a/litellm/llms/anthropic/cost_calculation.py b/litellm/llms/anthropic/cost_calculation.py index 8f34eb00ce5..11b61cc92f0 100644 --- a/litellm/llms/anthropic/cost_calculation.py +++ b/litellm/llms/anthropic/cost_calculation.py @@ -22,10 +22,17 @@ def cost_per_token(model: str, usage: "Usage") -> Tuple[float, float]: Returns: Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd """ - return generic_cost_per_token( - model=model, usage=usage, custom_llm_provider="anthropic" + # If usage has inference_geo, prepend it as prefix to model name + if hasattr(usage, "inference_geo") and usage.inference_geo and usage.inference_geo.lower() not in ["global", "not_available"]: + model_with_geo_prefix = f"{usage.inference_geo}/{model}" + else: + model_with_geo_prefix = model + prompt_cost, completion_cost = generic_cost_per_token( + model=model_with_geo_prefix, usage=usage, custom_llm_provider="anthropic" ) + return prompt_cost, completion_cost + def get_cost_for_anthropic_web_search( model_info: Optional["ModelInfo"] = None, diff --git a/litellm/llms/anthropic/experimental_pass_through/messages/transformation.py b/litellm/llms/anthropic/experimental_pass_through/messages/transformation.py index 308bf367d06..bb40f9df266 100644 --- a/litellm/llms/anthropic/experimental_pass_through/messages/transformation.py +++ b/litellm/llms/anthropic/experimental_pass_through/messages/transformation.py @@ -2,6 +2,9 @@ import httpx +from litellm.anthropic_beta_headers_manager import ( + update_headers_with_filtered_beta, +) from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj from litellm.litellm_core_utils.litellm_logging import verbose_logger from litellm.llms.base_llm.anthropic_messages.transformation import ( @@ -90,6 +93,11 @@ def validate_anthropic_messages_environment( optional_params=optional_params, ) + headers = update_headers_with_filtered_beta( + headers=headers, + provider="anthropic", + ) + return headers, api_base def transform_anthropic_messages_request( @@ -189,8 +197,27 @@ def _update_headers_with_anthropic_beta( beta_values.update(b.strip() for b in existing_beta.split(",")) # Check for context management - if optional_params.get("context_management") is not None: - beta_values.add(ANTHROPIC_BETA_HEADER_VALUES.CONTEXT_MANAGEMENT_2025_06_27.value) + context_management_param = optional_params.get("context_management") + if context_management_param is not None: + # Check edits array for compact_20260112 type + edits = context_management_param.get("edits", []) + has_compact = False + has_other = False + + for edit in edits: + edit_type = edit.get("type", "") + if edit_type == "compact_20260112": + has_compact = True + else: + has_other = True + + # Add compact header if any compact edits exist + if has_compact: + beta_values.add(ANTHROPIC_BETA_HEADER_VALUES.COMPACT_2026_01_12.value) + + # Add context management header if any other edits exist + if has_other: + beta_values.add(ANTHROPIC_BETA_HEADER_VALUES.CONTEXT_MANAGEMENT_2025_06_27.value) # Check for structured outputs if optional_params.get("output_format") is not None: diff --git a/litellm/llms/azure_ai/anthropic/transformation.py b/litellm/llms/azure_ai/anthropic/transformation.py index 2d8d3b987c7..753bc9c08eb 100644 --- a/litellm/llms/azure_ai/anthropic/transformation.py +++ b/litellm/llms/azure_ai/anthropic/transformation.py @@ -3,6 +3,9 @@ """ from typing import TYPE_CHECKING, Dict, List, Optional, Union +from litellm.anthropic_beta_headers_manager import ( + update_headers_with_filtered_beta, +) from litellm.llms.anthropic.chat.transformation import AnthropicConfig from litellm.llms.azure.common_utils import BaseAzureLLM from litellm.types.llms.openai import AllMessageValues @@ -87,6 +90,12 @@ def validate_environment( if "anthropic-version" not in headers: headers["anthropic-version"] = "2023-06-01" + # Filter out unsupported beta headers for Azure AI + headers = update_headers_with_filtered_beta( + headers=headers, + provider="azure_ai", + ) + return headers def transform_request( diff --git a/litellm/llms/bedrock/chat/converse_transformation.py b/litellm/llms/bedrock/chat/converse_transformation.py index 6591e152a14..7fc51263ebb 100644 --- a/litellm/llms/bedrock/chat/converse_transformation.py +++ b/litellm/llms/bedrock/chat/converse_transformation.py @@ -11,6 +11,9 @@ import litellm from litellm._logging import verbose_logger +from litellm.anthropic_beta_headers_manager import ( + filter_and_transform_beta_headers, +) from litellm.constants import RESPONSE_FORMAT_TOOL_NAME from litellm.litellm_core_utils.core_helpers import ( filter_exceptions_from_params, @@ -30,8 +33,6 @@ from litellm.llms.anthropic.chat.transformation import AnthropicConfig from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException from litellm.types.llms.bedrock import * - -from ..common_utils import is_claude_4_5_on_bedrock from litellm.types.llms.openai import ( AllMessageValues, ChatCompletionAssistantMessage, @@ -68,6 +69,7 @@ BedrockModelInfo, get_anthropic_beta_from_headers, get_bedrock_tool_name, + is_claude_4_5_on_bedrock, ) # Computer use tool prefixes supported by Bedrock @@ -83,6 +85,7 @@ UNSUPPORTED_BEDROCK_CONVERSE_BETA_PATTERNS = [ "advanced-tool-use", # Bedrock Converse doesn't support advanced-tool-use beta headers "prompt-caching", # Prompt caching not supported in Converse API + "compact-2026-01-12", # The compact beta feature is not currently supported on the Converse and ConverseStream APIs ] @@ -431,7 +434,7 @@ def _handle_reasoning_effort_parameter( else: # Anthropic and other models: convert to thinking parameter optional_params["thinking"] = AnthropicConfig._map_reasoning_effort( - reasoning_effort + reasoning_effort=reasoning_effort, model=model ) def get_supported_openai_params(self, model: str) -> List[str]: @@ -617,37 +620,6 @@ def _transform_computer_use_tools( return transformed_tools - def _filter_unsupported_beta_headers_for_bedrock( - self, model: str, beta_list: list - ) -> list: - """ - Remove beta headers that are not supported on Bedrock Converse API for the given model. - - Extended thinking beta headers are only supported on specific Claude 4+ models. - Some beta headers are universally unsupported on Bedrock Converse API. - - Args: - model: The model name - beta_list: The list of beta headers to filter - - Returns: - Filtered list of beta headers - """ - filtered_betas = [] - - # 1. Filter out beta headers that are universally unsupported on Bedrock Converse - for beta in beta_list: - should_keep = True - for unsupported_pattern in UNSUPPORTED_BEDROCK_CONVERSE_BETA_PATTERNS: - if unsupported_pattern in beta.lower(): - should_keep = False - break - - if should_keep: - filtered_betas.append(beta) - - return filtered_betas - def _separate_computer_use_tools( self, tools: List[OpenAIChatCompletionToolParam], model: str ) -> Tuple[ @@ -1124,7 +1096,28 @@ def _process_tools_and_beta( # Add computer use tools and anthropic_beta if needed (only when computer use tools are present) if computer_use_tools: - anthropic_beta_list.append("computer-use-2024-10-22") + # Determine the correct computer-use beta header based on model + # "computer-use-2025-11-24" for Claude Opus 4.6, Claude Opus 4.5 + # "computer-use-2025-01-24" for Claude Sonnet 4.5, Haiku 4.5, Opus 4.1, Sonnet 4, Opus 4, and Sonnet 3.7 + # "computer-use-2024-10-22" for older models + model_lower = model.lower() + if "opus-4.6" in model_lower or "opus_4.6" in model_lower or "opus-4-6" in model_lower or "opus_4_6" in model_lower: + computer_use_header = "computer-use-2025-11-24" + elif "opus-4.5" in model_lower or "opus_4.5" in model_lower or "opus-4-5" in model_lower or "opus_4_5" in model_lower: + computer_use_header = "computer-use-2025-11-24" + elif any(pattern in model_lower for pattern in [ + "sonnet-4.5", "sonnet_4.5", "sonnet-4-5", "sonnet_4_5", + "haiku-4.5", "haiku_4.5", "haiku-4-5", "haiku_4_5", + "opus-4.1", "opus_4.1", "opus-4-1", "opus_4_1", + "sonnet-4", "sonnet_4", + "opus-4", "opus_4", + "sonnet-3.7", "sonnet_3.7", "sonnet-3-7", "sonnet_3_7" + ]): + computer_use_header = "computer-use-2025-01-24" + else: + computer_use_header = "computer-use-2024-10-22" + + anthropic_beta_list.append(computer_use_header) # Transform computer use tools to proper Bedrock format transformed_computer_tools = self._transform_computer_use_tools( computer_use_tools @@ -1150,13 +1143,13 @@ def _process_tools_and_beta( unique_betas.append(beta) seen.add(beta) - # Filter out unsupported beta headers for Bedrock Converse API - filtered_betas = self._filter_unsupported_beta_headers_for_bedrock( - model=model, - beta_list=unique_betas, + filtered_betas = filter_and_transform_beta_headers( + beta_headers=unique_betas, + provider="bedrock_converse", ) - - additional_request_params["anthropic_beta"] = filtered_betas + + if filtered_betas: + additional_request_params["anthropic_beta"] = filtered_betas return bedrock_tools, anthropic_beta_list diff --git a/litellm/llms/bedrock/messages/invoke_transformations/anthropic_claude3_transformation.py b/litellm/llms/bedrock/messages/invoke_transformations/anthropic_claude3_transformation.py index 90de67a822f..19fe7d8c140 100644 --- a/litellm/llms/bedrock/messages/invoke_transformations/anthropic_claude3_transformation.py +++ b/litellm/llms/bedrock/messages/invoke_transformations/anthropic_claude3_transformation.py @@ -12,6 +12,9 @@ import httpx +from litellm.anthropic_beta_headers_manager import ( + filter_and_transform_beta_headers, +) from litellm.llms.anthropic.common_utils import AnthropicModelInfo from litellm.llms.anthropic.experimental_pass_through.messages.transformation import ( AnthropicMessagesConfig, @@ -55,10 +58,6 @@ class AmazonAnthropicClaudeMessagesConfig( # Beta header patterns that are not supported by Bedrock Invoke API # These will be filtered out to prevent 400 "invalid beta flag" errors - UNSUPPORTED_BEDROCK_INVOKE_BETA_PATTERNS = [ - "advanced-tool-use", # Bedrock Invoke doesn't support advanced-tool-use beta headers - "prompt-caching-scope", - ] def __init__(self, **kwargs): BaseAnthropicMessagesConfig.__init__(self, **kwargs) @@ -276,39 +275,48 @@ def _filter_unsupported_beta_headers_for_bedrock( model: The model name beta_set: The set of beta headers to filter in-place """ - beta_headers_to_remove = set() - has_advanced_tool_use = False - - # 1. Filter out beta headers that are universally unsupported on Bedrock Invoke and track if advanced-tool-use header is present - for beta in beta_set: - for unsupported_pattern in self.UNSUPPORTED_BEDROCK_INVOKE_BETA_PATTERNS: - if unsupported_pattern in beta.lower(): - beta_headers_to_remove.add(beta) - has_advanced_tool_use = True - break - - # 2. Filter out extended thinking headers for models that don't support them + # 1. Handle header transformations BEFORE filtering + # (advanced-tool-use -> tool-search-tool) + # This must happen before filtering because advanced-tool-use is in the unsupported list + has_advanced_tool_use = "advanced-tool-use-2025-11-20" in beta_set + if has_advanced_tool_use and self._supports_tool_search_on_bedrock(model): + beta_set.discard("advanced-tool-use-2025-11-20") + beta_set.add("tool-search-tool-2025-10-19") + beta_set.add("tool-examples-2025-10-29") + + # 2. Apply provider-level filtering using centralized JSON config + beta_list = list(beta_set) + filtered_list = filter_and_transform_beta_headers( + beta_headers=beta_list, + provider="bedrock", + ) + + # Update the set with filtered headers + beta_set.clear() + beta_set.update(filtered_list) + + # 2.1. Handle model-specific exceptions: structured-outputs is only supported on Opus 4.6 + # Re-add structured-outputs if it was in the original set and model is Opus 4.6 + model_lower = model.lower() + is_opus_4_6 = any(pattern in model_lower for pattern in ["opus-4.6", "opus_4.6", "opus-4-6", "opus_4_6"]) + if is_opus_4_6 and "structured-outputs-2025-11-13" in beta_list: + beta_set.add("structured-outputs-2025-11-13") + + # 3. Filter out extended thinking headers for models that don't support them extended_thinking_patterns = [ "extended-thinking", "interleaved-thinking", ] if not self._supports_extended_thinking_on_bedrock(model): + beta_headers_to_remove = set() for beta in beta_set: for pattern in extended_thinking_patterns: if pattern in beta.lower(): beta_headers_to_remove.add(beta) break - - # Remove all filtered headers - for beta in beta_headers_to_remove: - beta_set.discard(beta) - - # 3. Translate advanced-tool-use to Bedrock-specific headers for models that support tool search - # Ref: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages-request-response.html - # Ref: https://platform.claude.com/docs/en/agents-and-tools/tool-use/tool-search-tool - if has_advanced_tool_use and self._supports_tool_search_on_bedrock(model): - beta_set.add("tool-search-tool-2025-10-19") - beta_set.add("tool-examples-2025-10-29") + + for beta in beta_headers_to_remove: + beta_set.discard(beta) def _get_tool_search_beta_header_for_bedrock( self, diff --git a/litellm/llms/databricks/chat/transformation.py b/litellm/llms/databricks/chat/transformation.py index 2b7f5dd5995..e9ae94307d4 100644 --- a/litellm/llms/databricks/chat/transformation.py +++ b/litellm/llms/databricks/chat/transformation.py @@ -298,7 +298,8 @@ def map_openai_params( if "reasoning_effort" in non_default_params and "claude" in model: optional_params["thinking"] = AnthropicConfig._map_reasoning_effort( - non_default_params.get("reasoning_effort") + reasoning_effort=non_default_params.get("reasoning_effort"), + model=model ) optional_params.pop("reasoning_effort", None) ## handle thinking tokens diff --git a/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/experimental_pass_through/transformation.py b/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/experimental_pass_through/transformation.py index 9b8ff3ecc2d..918b8ecc225 100644 --- a/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/experimental_pass_through/transformation.py +++ b/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/experimental_pass_through/transformation.py @@ -1,5 +1,8 @@ from typing import Any, Dict, List, Optional, Tuple +from litellm.anthropic_beta_headers_manager import ( + update_headers_with_filtered_beta, +) from litellm.llms.anthropic.common_utils import AnthropicModelInfo from litellm.llms.anthropic.experimental_pass_through.messages.transformation import ( AnthropicMessagesConfig, @@ -7,7 +10,6 @@ from litellm.types.llms.anthropic import ( ANTHROPIC_BETA_HEADER_VALUES, ANTHROPIC_HOSTED_TOOLS, - ANTHROPIC_PROMPT_CACHING_SCOPE_BETA_HEADER, ) from litellm.types.llms.anthropic_tool_search import get_tool_search_beta_header from litellm.types.llms.vertex_ai import VertexPartnerProvider @@ -65,10 +67,6 @@ def validate_anthropic_messages_environment( existing_beta = headers.get("anthropic-beta") if existing_beta: beta_values.update(b.strip() for b in existing_beta.split(",")) - - # Use the helper to remove unsupported beta headers - self.remove_unsupported_beta(headers) - beta_values.discard(ANTHROPIC_PROMPT_CACHING_SCOPE_BETA_HEADER) # Check for web search tool for tool in tools: @@ -84,6 +82,12 @@ def validate_anthropic_messages_environment( if beta_values: headers["anthropic-beta"] = ",".join(beta_values) + # Filter out unsupported beta headers for Vertex AI + headers = update_headers_with_filtered_beta( + headers=headers, + provider="vertex_ai", + ) + return headers, api_base def get_complete_url( @@ -128,23 +132,3 @@ def transform_anthropic_messages_request( ) # do not pass output_format in request body to vertex ai - vertex ai does not support output_format as yet return anthropic_messages_request - - def remove_unsupported_beta(self, headers: dict) -> None: - """ - Helper method to remove unsupported beta headers from the beta headers. - Modifies headers in place. - """ - unsupported_beta_headers = [ - ANTHROPIC_PROMPT_CACHING_SCOPE_BETA_HEADER - ] - existing_beta = headers.get("anthropic-beta") - if existing_beta: - filtered_beta = [ - b.strip() - for b in existing_beta.split(",") - if b.strip() not in unsupported_beta_headers - ] - if filtered_beta: - headers["anthropic-beta"] = ",".join(filtered_beta) - elif "anthropic-beta" in headers: - del headers["anthropic-beta"] diff --git a/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/transformation.py b/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/transformation.py index 1df07f405e6..0b728d88e76 100644 --- a/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/transformation.py +++ b/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/transformation.py @@ -51,6 +51,40 @@ class VertexAIAnthropicConfig(AnthropicConfig): def custom_llm_provider(self) -> Optional[str]: return "vertex_ai" + def _add_context_management_beta_headers( + self, beta_set: set, context_management: dict + ) -> None: + """ + Add context_management beta headers to the beta_set. + + - If any edit has type "compact_20260112", add compact-2026-01-12 header + - For all other edits, add context-management-2025-06-27 header + + Args: + beta_set: Set of beta headers to modify in-place + context_management: The context_management dict from optional_params + """ + from litellm.types.llms.anthropic import ANTHROPIC_BETA_HEADER_VALUES + + edits = context_management.get("edits", []) + has_compact = False + has_other = False + + for edit in edits: + edit_type = edit.get("type", "") + if edit_type == "compact_20260112": + has_compact = True + else: + has_other = True + + # Add compact header if any compact edits exist + if has_compact: + beta_set.add(ANTHROPIC_BETA_HEADER_VALUES.COMPACT_2026_01_12.value) + + # Add context management header if any other edits exist + if has_other: + beta_set.add(ANTHROPIC_BETA_HEADER_VALUES.CONTEXT_MANAGEMENT_2025_06_27.value) + def transform_request( self, model: str, @@ -86,6 +120,11 @@ def transform_request( beta_set = set(auto_betas) if tool_search_used: beta_set.add("tool-search-tool-2025-10-19") # Vertex requires this header for tool search + + # Add context_management beta headers (compact and/or context-management) + context_management = optional_params.get("context_management") + if context_management: + self._add_context_management_beta_headers(beta_set, context_management) if beta_set: data["anthropic_beta"] = list(beta_set) diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 04ad5ed0c1c..0da47634a94 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -963,7 +963,7 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 159 }, - "anthropic.claude-opus-4-6-v1:0": { + "anthropic.claude-opus-4-6-v1": { "cache_creation_input_token_cost": 6.25e-06, "cache_creation_input_token_cost_above_200k_tokens": 1.25e-05, "cache_read_input_token_cost": 5e-07, @@ -1023,7 +1023,7 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 346 }, - "global.anthropic.claude-opus-4-6-v1:0": { + "global.anthropic.claude-opus-4-6-v1": { "cache_creation_input_token_cost": 6.25e-06, "cache_creation_input_token_cost_above_200k_tokens": 1.25e-05, "cache_read_input_token_cost": 5e-07, @@ -1143,7 +1143,7 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 346 }, - "eu.anthropic.claude-opus-4-6-v1:0": { + "eu.anthropic.claude-opus-4-6-v1": { "cache_creation_input_token_cost": 6.875e-06, "cache_creation_input_token_cost_above_200k_tokens": 1.375e-05, "cache_read_input_token_cost": 5.5e-07, @@ -1203,7 +1203,7 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 346 }, - "apac.anthropic.claude-opus-4-6-v1:0": { + "apac.anthropic.claude-opus-4-6-v1": { "cache_creation_input_token_cost": 6.875e-06, "cache_creation_input_token_cost_above_200k_tokens": 1.375e-05, "cache_read_input_token_cost": 5.5e-07, @@ -3494,29 +3494,6 @@ "supports_tool_choice": true, "supports_vision": true }, - "azure/gpt-5-search-api": { - "cache_read_input_token_cost": 1.25e-07, - "input_cost_per_token": 1.25e-06, - "litellm_provider": "azure", - "max_input_tokens": 128000, - "max_output_tokens": 4096, - "max_tokens": 4096, - "mode": "chat", - "output_cost_per_token": 1e-05, - "search_context_cost_per_query": { - "search_context_size_high": 0.05, - "search_context_size_low": 0.03, - "search_context_size_medium": 0.035 - }, - "supports_function_calling": true, - "supports_parallel_function_calling": true, - "supports_prompt_caching": true, - "supports_response_schema": true, - "supports_system_messages": true, - "supports_tool_choice": true, - "supports_vision": true, - "supports_web_search": true - }, "azure/gpt-5-2025-08-07": { "cache_read_input_token_cost": 1.25e-07, "input_cost_per_token": 1.25e-06, @@ -7836,6 +7813,37 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 346 }, + "us/claude-opus-4-6": { + "cache_creation_input_token_cost": 6.875e-06, + "cache_creation_input_token_cost_above_200k_tokens": 1.375e-05, + "cache_creation_input_token_cost_above_1hr": 1.1e-05, + "cache_read_input_token_cost": 5.5e-07, + "cache_read_input_token_cost_above_200k_tokens": 1.1e-06, + "input_cost_per_token": 5.5e-06, + "input_cost_per_token_above_200k_tokens": 1.1e-05, + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2.75e-05, + "output_cost_per_token_above_200k_tokens": 4.125e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, "claude-opus-4-6-20260205": { "cache_creation_input_token_cost": 6.25e-06, "cache_creation_input_token_cost_above_200k_tokens": 1.25e-05, @@ -7867,6 +7875,37 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 346 }, + "us/claude-opus-4-6-20260205": { + "cache_creation_input_token_cost": 6.875e-06, + "cache_creation_input_token_cost_above_200k_tokens": 1.375e-05, + "cache_creation_input_token_cost_above_1hr": 1.1e-05, + "cache_read_input_token_cost": 5.5e-07, + "cache_read_input_token_cost_above_200k_tokens": 1.1e-06, + "input_cost_per_token": 5.5e-06, + "input_cost_per_token_above_200k_tokens": 1.1e-05, + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2.75e-05, + "output_cost_per_token_above_200k_tokens": 4.125e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, "claude-sonnet-4-20250514": { "deprecation_date": "2026-05-14", "cache_creation_input_token_cost": 3.75e-06, @@ -18758,29 +18797,6 @@ "supports_service_tier": true, "supports_vision": true }, - "gpt-5-search-api": { - "cache_read_input_token_cost": 1.25e-07, - "input_cost_per_token": 1.25e-06, - "litellm_provider": "openai", - "max_input_tokens": 128000, - "max_output_tokens": 4096, - "max_tokens": 4096, - "mode": "chat", - "output_cost_per_token": 1e-05, - "search_context_cost_per_query": { - "search_context_size_high": 0.05, - "search_context_size_low": 0.03, - "search_context_size_medium": 0.035 - }, - "supports_function_calling": true, - "supports_parallel_function_calling": true, - "supports_prompt_caching": true, - "supports_response_schema": true, - "supports_system_messages": true, - "supports_tool_choice": true, - "supports_vision": true, - "supports_web_search": true - }, "gpt-5.1": { "cache_read_input_token_cost": 1.25e-07, "cache_read_input_token_cost_priority": 2.5e-07, diff --git a/litellm/types/llms/anthropic.py b/litellm/types/llms/anthropic.py index 62e775d4faa..fedf419efd6 100644 --- a/litellm/types/llms/anthropic.py +++ b/litellm/types/llms/anthropic.py @@ -613,7 +613,7 @@ class AnthropicChatCompletionUsageBlock(ChatCompletionUsageBlock, total=False): class AnthropicThinkingParam(TypedDict, total=False): - type: Literal["enabled"] + type: Literal["enabled", "adaptive"] budget_tokens: int @@ -633,6 +633,7 @@ class ANTHROPIC_BETA_HEADER_VALUES(str, Enum): WEB_FETCH_2025_09_10 = "web-fetch-2025-09-10" WEB_SEARCH_2025_03_05 = "web-search-2025-03-05" CONTEXT_MANAGEMENT_2025_06_27 = "context-management-2025-06-27" + COMPACT_2026_01_12 = "compact-2026-01-12" STRUCTURED_OUTPUT_2025_09_25 = "structured-outputs-2025-11-13" ADVANCED_TOOL_USE_2025_11_20 = "advanced-tool-use-2025-11-20" diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index a3e1edb4ae5..0da47634a94 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -963,7 +963,7 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 159 }, - "anthropic.claude-opus-4-6-v1:0": { + "anthropic.claude-opus-4-6-v1": { "cache_creation_input_token_cost": 6.25e-06, "cache_creation_input_token_cost_above_200k_tokens": 1.25e-05, "cache_read_input_token_cost": 5e-07, @@ -1023,7 +1023,7 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 346 }, - "global.anthropic.claude-opus-4-6-v1:0": { + "global.anthropic.claude-opus-4-6-v1": { "cache_creation_input_token_cost": 6.25e-06, "cache_creation_input_token_cost_above_200k_tokens": 1.25e-05, "cache_read_input_token_cost": 5e-07, @@ -1143,7 +1143,7 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 346 }, - "eu.anthropic.claude-opus-4-6-v1:0": { + "eu.anthropic.claude-opus-4-6-v1": { "cache_creation_input_token_cost": 6.875e-06, "cache_creation_input_token_cost_above_200k_tokens": 1.375e-05, "cache_read_input_token_cost": 5.5e-07, @@ -1203,7 +1203,7 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 346 }, - "apac.anthropic.claude-opus-4-6-v1:0": { + "apac.anthropic.claude-opus-4-6-v1": { "cache_creation_input_token_cost": 6.875e-06, "cache_creation_input_token_cost_above_200k_tokens": 1.375e-05, "cache_read_input_token_cost": 5.5e-07, @@ -7813,6 +7813,37 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 346 }, + "us/claude-opus-4-6": { + "cache_creation_input_token_cost": 6.875e-06, + "cache_creation_input_token_cost_above_200k_tokens": 1.375e-05, + "cache_creation_input_token_cost_above_1hr": 1.1e-05, + "cache_read_input_token_cost": 5.5e-07, + "cache_read_input_token_cost_above_200k_tokens": 1.1e-06, + "input_cost_per_token": 5.5e-06, + "input_cost_per_token_above_200k_tokens": 1.1e-05, + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2.75e-05, + "output_cost_per_token_above_200k_tokens": 4.125e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, "claude-opus-4-6-20260205": { "cache_creation_input_token_cost": 6.25e-06, "cache_creation_input_token_cost_above_200k_tokens": 1.25e-05, @@ -7844,6 +7875,37 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 346 }, + "us/claude-opus-4-6-20260205": { + "cache_creation_input_token_cost": 6.875e-06, + "cache_creation_input_token_cost_above_200k_tokens": 1.375e-05, + "cache_creation_input_token_cost_above_1hr": 1.1e-05, + "cache_read_input_token_cost": 5.5e-07, + "cache_read_input_token_cost_above_200k_tokens": 1.1e-06, + "input_cost_per_token": 5.5e-06, + "input_cost_per_token_above_200k_tokens": 1.1e-05, + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2.75e-05, + "output_cost_per_token_above_200k_tokens": 4.125e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, "claude-sonnet-4-20250514": { "deprecation_date": "2026-05-14", "cache_creation_input_token_cost": 3.75e-06, diff --git a/tests/test_litellm/llms/anthropic/chat/test_anthropic_chat_transformation.py b/tests/test_litellm/llms/anthropic/chat/test_anthropic_chat_transformation.py index eee0b267fad..49db7367c67 100644 --- a/tests/test_litellm/llms/anthropic/chat/test_anthropic_chat_transformation.py +++ b/tests/test_litellm/llms/anthropic/chat/test_anthropic_chat_transformation.py @@ -185,7 +185,7 @@ def test_extract_response_content_with_citations(): }, } - _, citations, _, _, _, _ , _= config.extract_response_content(completion_response) + _, citations, _, _, _, _, _, _ = config.extract_response_content(completion_response) assert citations == [ [ { @@ -342,7 +342,7 @@ def test_web_search_tool_result_extraction(): } } - text, citations, thinking_blocks, reasoning_content, tool_calls, web_search_results, tool_results = config.extract_response_content( + text, citations, thinking_blocks, reasoning_content, tool_calls, web_search_results, tool_results, compaction_blocks = config.extract_response_content( completion_response ) @@ -474,7 +474,7 @@ def test_multiple_web_search_tool_results(): ] } - text, citations, thinking_blocks, reasoning_content, tool_calls, web_search_results, tool_results = config.extract_response_content( + text, citations, thinking_blocks, reasoning_content, tool_calls, web_search_results, tool_results, compaction_blocks = config.extract_response_content( completion_response ) @@ -817,59 +817,6 @@ def test_anthropic_chat_transform_request_includes_context_management(): assert result["context_management"] == _sample_context_management_payload() -def test_transform_parsed_response_includes_context_management_metadata(): - import httpx - - from litellm.types.utils import ModelResponse - - config = AnthropicConfig() - context_management_payload = { - "applied_edits": [ - { - "type": "clear_tool_uses_20250919", - "cleared_tool_uses": 2, - "cleared_input_tokens": 5000, - } - ] - } - completion_response = { - "id": "msg_context_management_test", - "type": "message", - "role": "assistant", - "model": "claude-sonnet-4-20250514", - "content": [{"type": "text", "text": "Done."}], - "stop_reason": "end_turn", - "stop_sequence": None, - "usage": { - "input_tokens": 10, - "cache_creation_input_tokens": 0, - "cache_read_input_tokens": 0, - "output_tokens": 5, - }, - "context_management": context_management_payload, - } - raw_response = httpx.Response( - status_code=200, - headers={}, - ) - model_response = ModelResponse() - - result = config.transform_parsed_response( - completion_response=completion_response, - raw_response=raw_response, - model_response=model_response, - json_mode=False, - prefix_prompt=None, - ) - - assert result.__dict__.get("context_management") == context_management_payload - provider_fields = result.choices[0].message.provider_specific_fields - assert ( - provider_fields - and provider_fields["context_management"] == context_management_payload - ) - - def test_anthropic_structured_output_beta_header(): from litellm.types.utils import CallTypes from litellm.utils import return_raw_request @@ -1043,7 +990,7 @@ def test_server_tool_use_in_response(): ] } - text, citations, thinking_blocks, reasoning_content, tool_calls, web_search_results, tool_results = config.extract_response_content( + text, citations, thinking_blocks, reasoning_content, tool_calls, web_search_results, tool_results, compaction_blocks = config.extract_response_content( completion_response ) @@ -1171,7 +1118,7 @@ def test_tool_search_complete_response_parsing(): } # Extract content - text, citations, thinking_blocks, reasoning_content, tool_calls, web_search_results, tool_results = config.extract_response_content( + text, citations, thinking_blocks, reasoning_content, tool_calls, web_search_results, tool_results, compaction_blocks = config.extract_response_content( completion_response ) @@ -1291,7 +1238,7 @@ def test_caller_field_in_response(): "usage": {"input_tokens": 100, "output_tokens": 50} } - text, citations, thinking, reasoning, tool_calls, web_search_results, tool_results = config.extract_response_content(completion_response) + text, citations, thinking, reasoning, tool_calls, web_search_results, tool_results, compaction_blocks = config.extract_response_content(completion_response) assert len(tool_calls) == 1 assert tool_calls[0]["id"] == "toolu_123" @@ -1934,6 +1881,75 @@ def test_calculate_usage_completion_tokens_details_with_reasoning(): assert usage.completion_tokens == 500 +# ============ Reasoning Effort Tests ============ + + +def test_reasoning_effort_maps_to_adaptive_thinking_for_opus_4_6(): + """ + Test that reasoning_effort maps to adaptive thinking type for Claude Opus 4.6. + + For Claude Opus 4.6, reasoning_effort should map to {"type": "adaptive"} + regardless of the effort level specified. + """ + config = AnthropicConfig() + + # Test with different reasoning_effort values - all should map to adaptive + for effort in ["low", "medium", "high", "minimal"]: + non_default_params = {"reasoning_effort": effort} + optional_params = {} + + result = config.map_openai_params( + non_default_params=non_default_params, + optional_params=optional_params, + model="claude-opus-4-6-20250514", + drop_params=False + ) + + # Should map to adaptive thinking type + assert "thinking" in result + assert result["thinking"]["type"] == "adaptive" + # Should not have budget_tokens for adaptive type + assert "budget_tokens" not in result["thinking"] + # reasoning_effort should not be in the result (it's transformed to thinking) + assert "reasoning_effort" not in result + + +def test_reasoning_effort_maps_to_budget_thinking_for_non_opus_4_6(): + """ + Test that reasoning_effort maps to budget-based thinking config for non-Opus 4.6 models. + + For models other than Claude Opus 4.6, reasoning_effort should map to + thinking config with budget_tokens based on the effort level. + """ + config = AnthropicConfig() + + # Test with Claude Sonnet 4.5 (non-Opus 4.6 model) + test_cases = [ + ("low", 1024), # DEFAULT_REASONING_EFFORT_LOW_THINKING_BUDGET + ("medium", 2048), # DEFAULT_REASONING_EFFORT_MEDIUM_THINKING_BUDGET + ("high", 4096), # DEFAULT_REASONING_EFFORT_HIGH_THINKING_BUDGET + ("minimal", 128), # DEFAULT_REASONING_EFFORT_MINIMAL_THINKING_BUDGET + ] + + for effort, expected_budget in test_cases: + non_default_params = {"reasoning_effort": effort} + optional_params = {} + + result = config.map_openai_params( + non_default_params=non_default_params, + optional_params=optional_params, + model="claude-sonnet-4-5-20250929", + drop_params=False + ) + + # Should map to enabled thinking type with budget_tokens + assert "thinking" in result + assert result["thinking"]["type"] == "enabled" + assert result["thinking"]["budget_tokens"] == expected_budget + # reasoning_effort should not be in the result (it's transformed to thinking) + assert "reasoning_effort" not in result + + def test_code_execution_tool_results_extraction(): """ Test that code execution tool results (bash_code_execution_tool_result, @@ -2174,3 +2190,319 @@ def test_web_search_tool_result_backwards_compatibility(): # Should NOT be in tool_results assert provider_fields.get("tool_results") is None + + +# ============ Compaction Tests ============ + + +def test_compaction_block_extraction(): + """ + Test that compaction blocks are correctly extracted from Anthropic response. + """ + config = AnthropicConfig() + + completion_response = { + "id": "msg_compaction_test", + "type": "message", + "role": "assistant", + "model": "claude-opus-4-6", + "content": [ + { + "type": "compaction", + "content": "Summary of the conversation: The user requested help building a web scraper..." + }, + { + "type": "text", + "text": "I don't have access to real-time data, so I can't provide the current weather in San Francisco." + } + ], + "stop_reason": "max_tokens", + "stop_sequence": None, + "usage": { + "input_tokens": 86, + "output_tokens": 100 + } + } + + text, citations, thinking_blocks, reasoning_content, tool_calls, web_search_results, tool_results, compaction_blocks = config.extract_response_content( + completion_response + ) + + # Verify compaction blocks are extracted + assert compaction_blocks is not None + assert len(compaction_blocks) == 1 + assert compaction_blocks[0]["type"] == "compaction" + assert "Summary of the conversation" in compaction_blocks[0]["content"] + + # Verify text content is extracted + assert "I don't have access to real-time data" in text + + +def test_compaction_block_in_provider_specific_fields(): + """ + Test that compaction blocks are included in provider_specific_fields. + """ + import httpx + + from litellm.types.utils import ModelResponse + + config = AnthropicConfig() + + completion_response = { + "id": "msg_compaction_provider_fields", + "type": "message", + "role": "assistant", + "model": "claude-opus-4-6", + "content": [ + { + "type": "compaction", + "content": "Summary of the conversation: The user requested help building a web scraper..." + }, + { + "type": "text", + "text": "Here is the response." + } + ], + "stop_reason": "end_turn", + "usage": { + "input_tokens": 50, + "output_tokens": 25 + } + } + + raw_response = httpx.Response(status_code=200, headers={}) + model_response = ModelResponse() + + result = config.transform_parsed_response( + completion_response=completion_response, + raw_response=raw_response, + model_response=model_response, + json_mode=False, + prefix_prompt=None, + ) + + # Verify compaction_blocks is in provider_specific_fields + provider_fields = result.choices[0].message.provider_specific_fields + assert provider_fields is not None + assert "compaction_blocks" in provider_fields + assert len(provider_fields["compaction_blocks"]) == 1 + assert provider_fields["compaction_blocks"][0]["type"] == "compaction" + assert "Summary of the conversation" in provider_fields["compaction_blocks"][0]["content"] + + +def test_multiple_compaction_blocks(): + """ + Test that multiple compaction blocks are all extracted. + """ + config = AnthropicConfig() + + completion_response = { + "content": [ + { + "type": "compaction", + "content": "First summary..." + }, + { + "type": "text", + "text": "Some text." + }, + { + "type": "compaction", + "content": "Second summary..." + } + ] + } + + text, citations, thinking_blocks, reasoning_content, tool_calls, web_search_results, tool_results, compaction_blocks = config.extract_response_content( + completion_response + ) + + # Verify both compaction blocks are extracted + assert compaction_blocks is not None + assert len(compaction_blocks) == 2 + assert compaction_blocks[0]["content"] == "First summary..." + assert compaction_blocks[1]["content"] == "Second summary..." + + +def test_compaction_block_request_transformation(): + """ + Test that compaction blocks from provider_specific_fields are correctly + transformed back to Anthropic format in requests. + """ + from litellm.litellm_core_utils.prompt_templates.factory import ( + anthropic_messages_pt, + ) + + messages = [ + { + "role": "user", + "content": "What is the weather in San Francisco?" + }, + { + "role": "assistant", + "content": [ + { + "type": "text", + "text": "I don't have access to real-time data." + } + ], + "provider_specific_fields": { + "compaction_blocks": [ + { + "type": "compaction", + "content": "Summary of the conversation: The user requested help building a web scraper..." + } + ] + } + }, + { + "role": "user", + "content": "What about New York?" + } + ] + + result = anthropic_messages_pt( + messages=messages, + model="claude-opus-4-6", + llm_provider="anthropic" + ) + + # Find the assistant message + assistant_message = None + for msg in result: + if msg["role"] == "assistant": + assistant_message = msg + break + + assert assistant_message is not None + assert "content" in assistant_message + assert isinstance(assistant_message["content"], list) + + # Verify compaction block is at the beginning + assert assistant_message["content"][0]["type"] == "compaction" + assert "Summary of the conversation" in assistant_message["content"][0]["content"] + + # Verify text content follows + text_blocks = [c for c in assistant_message["content"] if c.get("type") == "text"] + assert len(text_blocks) > 0 + assert "I don't have access to real-time data" in text_blocks[0]["text"] + + +def test_compaction_with_context_management(): + """ + Test that compaction works with context_management parameter. + """ + config = AnthropicConfig() + + messages = [{"role": "user", "content": "Hello"}] + optional_params = { + "context_management": { + "edits": [ + { + "type": "compact_20260112" + } + ] + }, + "max_tokens": 100 + } + + result = config.transform_request( + model="claude-opus-4-6", + messages=messages, + optional_params=optional_params, + litellm_params={}, + headers={} + ) + + # Verify context_management is included + assert "context_management" in result + assert result["context_management"]["edits"][0]["type"] == "compact_20260112" + + +def test_compaction_block_with_other_content_types(): + """ + Test that compaction blocks work alongside other content types like thinking blocks and tool calls. + """ + config = AnthropicConfig() + + completion_response = { + "content": [ + { + "type": "compaction", + "content": "Summary of previous conversation..." + }, + { + "type": "thinking", + "thinking": "Let me think about this..." + }, + { + "type": "text", + "text": "Based on my analysis..." + }, + { + "type": "tool_use", + "id": "toolu_123", + "name": "get_weather", + "input": {"location": "San Francisco"} + } + ] + } + + text, citations, thinking_blocks, reasoning_content, tool_calls, web_search_results, tool_results, compaction_blocks = config.extract_response_content( + completion_response + ) + + # Verify all content types are extracted + assert compaction_blocks is not None + assert len(compaction_blocks) == 1 + assert thinking_blocks is not None + assert len(thinking_blocks) == 1 + assert "Based on my analysis" in text + assert len(tool_calls) == 1 + assert tool_calls[0]["function"]["name"] == "get_weather" + + +def test_compaction_block_empty_list_not_added(): + """ + Test that empty compaction_blocks list is not added to provider_specific_fields. + """ + import httpx + + from litellm.types.utils import ModelResponse + + config = AnthropicConfig() + + # Response without compaction blocks + completion_response = { + "id": "msg_no_compaction", + "type": "message", + "role": "assistant", + "model": "claude-opus-4-6", + "content": [ + { + "type": "text", + "text": "Just a regular response." + } + ], + "stop_reason": "end_turn", + "usage": { + "input_tokens": 10, + "output_tokens": 5 + } + } + + raw_response = httpx.Response(status_code=200, headers={}) + model_response = ModelResponse() + + result = config.transform_parsed_response( + completion_response=completion_response, + raw_response=raw_response, + model_response=model_response, + json_mode=False, + prefix_prompt=None, + ) + + # Verify compaction_blocks is not in provider_specific_fields when there are none + provider_fields = result.choices[0].message.provider_specific_fields + if provider_fields: + assert "compaction_blocks" not in provider_fields or provider_fields.get("compaction_blocks") is None diff --git a/tests/test_litellm/llms/azure_ai/claude/test_azure_anthropic_transformation.py b/tests/test_litellm/llms/azure_ai/claude/test_azure_anthropic_transformation.py index e43a899325f..f0f8a9d91bf 100644 --- a/tests/test_litellm/llms/azure_ai/claude/test_azure_anthropic_transformation.py +++ b/tests/test_litellm/llms/azure_ai/claude/test_azure_anthropic_transformation.py @@ -235,3 +235,97 @@ def test_transform_request_removes_unsupported_params(self): assert result["max_tokens"] == 100 assert "messages" in result + def test_context_management_compact_beta_header(self): + """Test that context_management with compact adds the correct beta header for Azure AI""" + config = AzureAnthropicConfig() + + messages = [{"role": "user", "content": "Hello"}] + optional_params = { + "context_management": { + "edits": [ + { + "type": "compact_20260112" + } + ] + }, + "max_tokens": 100 + } + litellm_params = {"api_key": "test-key"} + headers = {"api-key": "test-key"} + + with patch( + "litellm.llms.azure.common_utils.BaseAzureLLM._base_validate_azure_environment" + ) as mock_validate: + mock_validate.return_value = {"api-key": "test-key"} + result = config.transform_request( + model="claude-opus-4-6", + messages=messages, + optional_params=optional_params, + litellm_params=litellm_params, + headers=headers, + ) + + # Verify context_management is included + assert "context_management" in result + assert result["context_management"]["edits"][0]["type"] == "compact_20260112" + + def test_context_management_compact_beta_header_in_headers(self): + """Test that compact beta header is added to headers for Azure AI""" + config = AzureAnthropicConfig() + + messages = [{"role": "user", "content": "Hello"}] + optional_params = { + "context_management": { + "edits": [ + { + "type": "compact_20260112" + } + ] + }, + "max_tokens": 100 + } + + # Test that the parent's update_headers_with_optional_anthropic_beta is called + # which should add the compact beta header + headers = {} + headers = config.update_headers_with_optional_anthropic_beta( + headers=headers, + optional_params=optional_params + ) + + # Verify compact beta header is present + assert "anthropic-beta" in headers + assert "compact-2026-01-12" in headers["anthropic-beta"] + + def test_context_management_mixed_edits_beta_headers(self): + """Test that context_management with both compact and other edits adds both beta headers""" + config = AzureAnthropicConfig() + + messages = [{"role": "user", "content": "Hello"}] + optional_params = { + "context_management": { + "edits": [ + { + "type": "compact_20260112" + }, + { + "type": "replace", + "message_id": "msg_123", + "content": "new content" + } + ] + }, + "max_tokens": 100 + } + + headers = {} + headers = config.update_headers_with_optional_anthropic_beta( + headers=headers, + optional_params=optional_params + ) + + # Verify both beta headers are present + assert "anthropic-beta" in headers + assert "compact-2026-01-12" in headers["anthropic-beta"] + assert "context-management-2025-06-27" in headers["anthropic-beta"] + diff --git a/tests/test_litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/test_vertex_ai_partner_models_anthropic_transformation.py b/tests/test_litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/test_vertex_ai_partner_models_anthropic_transformation.py index 3e6c6f6740c..90ab41aadf6 100644 --- a/tests/test_litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/test_vertex_ai_partner_models_anthropic_transformation.py +++ b/tests/test_litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/test_vertex_ai_partner_models_anthropic_transformation.py @@ -6,6 +6,9 @@ sys.path.insert( 0, os.path.abspath("../../../../../..") ) # Adds the parent directory to the system path +from litellm.anthropic_beta_headers_manager import ( + update_headers_with_filtered_beta, +) from litellm.llms.vertex_ai.vertex_ai_partner_models.anthropic.transformation import ( VertexAIAnthropicConfig, ) @@ -75,6 +78,76 @@ def test_vertex_ai_anthropic_web_search_header_in_completion(): "anthropic-beta with web-search should not be present for non-Vertex requests" +def test_vertex_ai_anthropic_context_management_compact_beta_header(): + """Test that context_management with compact adds the correct beta header for Vertex AI""" + config = VertexAIAnthropicConfig() + + messages = [{"role": "user", "content": "Hello"}] + optional_params = { + "context_management": { + "edits": [ + { + "type": "compact_20260112" + } + ] + }, + "max_tokens": 100, + "is_vertex_request": True + } + + result = config.transform_request( + model="claude-opus-4-6", + messages=messages, + optional_params=optional_params, + litellm_params={}, + headers={} + ) + + # Verify context_management is included + assert "context_management" in result + assert result["context_management"]["edits"][0]["type"] == "compact_20260112" + + # Verify compact beta header is in anthropic_beta field + assert "anthropic_beta" in result + assert "compact-2026-01-12" in result["anthropic_beta"] + + +def test_vertex_ai_anthropic_context_management_mixed_edits(): + """Test that context_management with both compact and other edits adds both beta headers""" + config = VertexAIAnthropicConfig() + + messages = [{"role": "user", "content": "Hello"}] + optional_params = { + "context_management": { + "edits": [ + { + "type": "compact_20260112" + }, + { + "type": "replace", + "message_id": "msg_123", + "content": "new content" + } + ] + }, + "max_tokens": 100, + "is_vertex_request": True + } + + result = config.transform_request( + model="claude-opus-4-6", + messages=messages, + optional_params=optional_params, + litellm_params={}, + headers={} + ) + + # Verify both beta headers are present + assert "anthropic_beta" in result + assert "compact-2026-01-12" in result["anthropic_beta"] + assert "context-management-2025-06-27" in result["anthropic_beta"] + + def test_vertex_ai_anthropic_structured_output_header_not_added(): """Test that structured output beta headers are NOT added for Vertex AI requests""" from litellm.llms.anthropic.chat.transformation import AnthropicConfig @@ -276,8 +349,7 @@ def test_vertex_ai_partner_models_anthropic_remove_prompt_caching_scope_beta_hea "anthropic-beta": f"other-feature,{PROMPT_CACHING_BETA_HEADER},web-search-2025-03-05" } - config = VertexAIPartnerModelsAnthropicMessagesConfig() - config.remove_unsupported_beta(headers) + headers = update_headers_with_filtered_beta(headers, "vertex_ai") beta_header = headers.get("anthropic-beta") assert PROMPT_CACHING_BETA_HEADER not in (beta_header or ""), \ @@ -288,5 +360,5 @@ def test_vertex_ai_partner_models_anthropic_remove_prompt_caching_scope_beta_hea "Other non-excluded beta headers should remain" # If prompt-caching was the only value, header should be removed completely headers2 = {"anthropic-beta": PROMPT_CACHING_BETA_HEADER} - config.remove_unsupported_beta(headers2) + headers2 = update_headers_with_filtered_beta(headers2, "vertex_ai") assert "anthropic-beta" not in headers2, "Header should be removed if no supported values remain" \ No newline at end of file diff --git a/tests/test_litellm/proxy/spend_tracking/test_spend_management_endpoints.py b/tests/test_litellm/proxy/spend_tracking/test_spend_management_endpoints.py index 13368d0a142..54a276bc97f 100644 --- a/tests/test_litellm/proxy/spend_tracking/test_spend_management_endpoints.py +++ b/tests/test_litellm/proxy/spend_tracking/test_spend_management_endpoints.py @@ -205,6 +205,7 @@ def test_can_user_view_spend_log_false_for_other_roles(): "metadata.additional_usage_values.prompt_tokens_details", "metadata.additional_usage_values.cache_creation_input_tokens", "metadata.additional_usage_values.cache_read_input_tokens", + "metadata.additional_usage_values.inference_geo", "metadata.litellm_overhead_time_ms", "metadata.cost_breakdown", ] diff --git a/tests/test_litellm/test_anthropic_beta_headers_manager.py b/tests/test_litellm/test_anthropic_beta_headers_manager.py new file mode 100644 index 00000000000..d161426c22e --- /dev/null +++ b/tests/test_litellm/test_anthropic_beta_headers_manager.py @@ -0,0 +1,306 @@ +""" +Tests for the centralized Anthropic beta headers manager. + +Design: JSON config lists UNSUPPORTED headers for each provider. +Headers not in the unsupported list are passed through. +Header transformations (e.g., advanced-tool-use -> tool-search-tool) happen in code, not in JSON. +""" + +import pytest + +from litellm.anthropic_beta_headers_manager import ( + filter_and_transform_beta_headers, + get_provider_beta_header, + get_provider_name, + get_unsupported_headers, + is_beta_header_supported, + update_headers_with_filtered_beta, +) + + +class TestProviderNameResolution: + """Test provider name resolution and aliases.""" + + def test_get_provider_name_direct(self): + """Test direct provider names.""" + assert get_provider_name("anthropic") == "anthropic" + assert get_provider_name("bedrock") == "bedrock" + assert get_provider_name("vertex_ai") == "vertex_ai" + assert get_provider_name("azure_ai") == "azure_ai" + + def test_get_provider_name_alias(self): + """Test provider aliases.""" + # Note: Aliases are defined in the JSON config + # If no alias exists, the original name is returned + assert get_provider_name("azure") == "azure" # No alias defined + assert get_provider_name("vertex_ai_beta") == "vertex_ai_beta" # No alias defined + + +class TestBetaHeaderSupport: + """Test beta header support checks (unsupported list approach).""" + + def test_anthropic_supports_all_headers(self): + """Anthropic should support all beta headers (empty unsupported list).""" + headers = [ + "web-fetch-2025-09-10", + "web-search-2025-03-05", + "context-management-2025-06-27", + "compact-2026-01-12", + "structured-outputs-2025-11-13", + "advanced-tool-use-2025-11-20", + ] + for header in headers: + assert is_beta_header_supported(header, "anthropic") + + def test_bedrock_unsupported_headers(self): + """Bedrock should block specific headers.""" + # Not supported (in unsupported list) + assert not is_beta_header_supported("advanced-tool-use-2025-11-20", "bedrock") + assert not is_beta_header_supported( + "prompt-caching-scope-2026-01-05", "bedrock" + ) + assert not is_beta_header_supported("structured-outputs-2025-11-13", "bedrock") + + # Supported (not in unsupported list) + assert is_beta_header_supported("context-management-2025-06-27", "bedrock") + assert is_beta_header_supported("effort-2025-11-24", "bedrock") + assert is_beta_header_supported("tool-examples-2025-10-29", "bedrock") + + def test_vertex_ai_unsupported_headers(self): + """Vertex AI should block specific headers.""" + # Not supported (in unsupported list) + assert not is_beta_header_supported( + "prompt-caching-scope-2026-01-05", "vertex_ai" + ) + + # Supported (not in unsupported list) + assert is_beta_header_supported("web-search-2025-03-05", "vertex_ai") + assert is_beta_header_supported("context-management-2025-06-27", "vertex_ai") + assert is_beta_header_supported("effort-2025-11-24", "vertex_ai") + assert is_beta_header_supported("advanced-tool-use-2025-11-20", "vertex_ai") + + +class TestBetaHeaderTransformation: + """Test beta header support checking (transformations happen in code, not here).""" + + def test_anthropic_no_transformation(self): + """Anthropic headers should pass through (empty unsupported list).""" + header = "advanced-tool-use-2025-11-20" + assert get_provider_beta_header(header, "anthropic") == header + + def test_bedrock_unsupported_returns_none(self): + """Bedrock should return None for unsupported headers.""" + header = "advanced-tool-use-2025-11-20" + # This header is in bedrock's unsupported list + assert get_provider_beta_header(header, "bedrock") is None + + def test_vertex_ai_supported_returns_original(self): + """Vertex AI should return original for supported headers.""" + header = "advanced-tool-use-2025-11-20" + # This header is NOT in vertex_ai's unsupported list + assert get_provider_beta_header(header, "vertex_ai") == header + + def test_unsupported_header_returns_none(self): + """Unsupported headers (in unsupported list) should return None.""" + header = "prompt-caching-scope-2026-01-05" + assert get_provider_beta_header(header, "bedrock") is None + + def test_supported_header_returns_original(self): + """Supported headers (not in unsupported list) should return original.""" + header = "context-management-2025-06-27" + assert get_provider_beta_header(header, "bedrock") == header + + +class TestFilterAndTransformBetaHeaders: + """Test the main filtering and transformation function.""" + + def test_anthropic_keeps_all_headers(self): + """Anthropic should keep all headers (empty unsupported list).""" + headers = [ + "web-fetch-2025-09-10", + "context-management-2025-06-27", + "structured-outputs-2025-11-13", + "some-new-future-header-2026-01-01", # Even unknown headers pass through + ] + result = filter_and_transform_beta_headers(headers, "anthropic") + assert set(result) == set(headers) + + def test_bedrock_filters_unsupported(self): + """Bedrock should filter out headers in unsupported list.""" + headers = [ + "context-management-2025-06-27", # Not in unsupported list -> kept + "advanced-tool-use-2025-11-20", # In unsupported list -> dropped + "structured-outputs-2025-11-13", # In unsupported list -> dropped + "prompt-caching-scope-2026-01-05", # In unsupported list -> dropped + ] + result = filter_and_transform_beta_headers(headers, "bedrock") + assert "context-management-2025-06-27" in result + assert "advanced-tool-use-2025-11-20" not in result + assert "structured-outputs-2025-11-13" not in result + assert "prompt-caching-scope-2026-01-05" not in result + + def test_bedrock_no_transformations_in_filter(self): + """Bedrock filtering doesn't do transformations (those happen in code).""" + headers = ["advanced-tool-use-2025-11-20"] + result = filter_and_transform_beta_headers(headers, "bedrock") + # advanced-tool-use is in unsupported list, so it gets dropped + assert result == [] + + def test_vertex_ai_filters_unsupported(self): + """Vertex AI should filter unsupported headers.""" + headers = [ + "web-search-2025-03-05", # Not in unsupported list -> kept + "advanced-tool-use-2025-11-20", # Not in unsupported list -> kept + "prompt-caching-scope-2026-01-05", # In unsupported list -> dropped + ] + result = filter_and_transform_beta_headers(headers, "vertex_ai") + assert "web-search-2025-03-05" in result + assert "advanced-tool-use-2025-11-20" in result # Kept as-is, transformation happens in code + assert "prompt-caching-scope-2026-01-05" not in result + + def test_empty_list_returns_empty(self): + """Empty list should return empty list.""" + result = filter_and_transform_beta_headers([], "anthropic") + assert result == [] + + def test_bedrock_converse_more_restrictive(self): + """Bedrock Converse should be more restrictive than Bedrock.""" + headers = [ + "context-management-2025-06-27", + "advanced-tool-use-2025-11-20", + "tool-examples-2025-10-29", + ] + + bedrock_result = filter_and_transform_beta_headers(headers, "bedrock") + converse_result = filter_and_transform_beta_headers(headers, "bedrock_converse") + + # Bedrock Converse has more restrictions + # advanced-tool-use is in both unsupported lists + assert "advanced-tool-use-2025-11-20" not in bedrock_result + assert "advanced-tool-use-2025-11-20" not in converse_result + + # tool-examples is supported on bedrock but not converse + # Actually, looking at the JSON, tool-examples is NOT in bedrock unsupported list + # So it should be in bedrock_result + assert "tool-examples-2025-10-29" in bedrock_result + # But it's not explicitly in converse unsupported list either, so it passes through + # Let me check the actual behavior + assert "context-management-2025-06-27" in bedrock_result + assert "context-management-2025-06-27" in converse_result + + def test_unknown_future_headers_pass_through(self): + """Headers not in unsupported list should pass through (future-proof).""" + headers = ["some-new-beta-2026-05-01", "another-feature-2026-06-01"] + result = filter_and_transform_beta_headers(headers, "anthropic") + assert set(result) == set(headers) + + +class TestUpdateHeadersWithFilteredBeta: + """Test the headers update function.""" + + def test_update_headers_anthropic(self): + """Test updating headers for Anthropic.""" + headers = { + "anthropic-beta": "web-fetch-2025-09-10,context-management-2025-06-27" + } + result = update_headers_with_filtered_beta(headers, "anthropic") + assert "anthropic-beta" in result + beta_values = set(result["anthropic-beta"].split(",")) + assert "web-fetch-2025-09-10" in beta_values + assert "context-management-2025-06-27" in beta_values + + def test_update_headers_bedrock_filters(self): + """Test updating headers for Bedrock with filtering.""" + headers = { + "anthropic-beta": "context-management-2025-06-27,advanced-tool-use-2025-11-20" + } + result = update_headers_with_filtered_beta(headers, "bedrock") + assert "anthropic-beta" in result + assert "context-management-2025-06-27" in result["anthropic-beta"] + assert "advanced-tool-use-2025-11-20" not in result["anthropic-beta"] + + def test_update_headers_bedrock_no_transformations(self): + """Test that filtering doesn't do transformations (those happen in code).""" + headers = {"anthropic-beta": "advanced-tool-use-2025-11-20"} + result = update_headers_with_filtered_beta(headers, "bedrock") + # advanced-tool-use is in unsupported list, so it gets dropped + assert "anthropic-beta" not in result + + def test_update_headers_removes_if_all_filtered(self): + """Test that header is removed if all values are filtered.""" + headers = {"anthropic-beta": "advanced-tool-use-2025-11-20,prompt-caching-scope-2026-01-05"} + result = update_headers_with_filtered_beta(headers, "bedrock") + assert "anthropic-beta" not in result + + def test_update_headers_no_beta_header(self): + """Test updating headers when no beta header exists.""" + headers = {"content-type": "application/json"} + result = update_headers_with_filtered_beta(headers, "anthropic") + assert "anthropic-beta" not in result + assert headers == result + + +class TestGetUnsupportedHeaders: + """Test getting unsupported headers for a provider.""" + + def test_anthropic_has_no_unsupported(self): + """Anthropic should have no unsupported headers (empty list).""" + anthropic_unsupported = get_unsupported_headers("anthropic") + assert len(anthropic_unsupported) == 0 + + def test_bedrock_converse_most_restrictive(self): + """Bedrock Converse should have more unsupported headers than Bedrock.""" + bedrock_unsupported = get_unsupported_headers("bedrock") + converse_unsupported = get_unsupported_headers("bedrock_converse") + # Converse has more restrictions + assert len(converse_unsupported) >= len(bedrock_unsupported) + + def test_all_providers_have_config(self): + """All providers should have a configuration entry.""" + providers = ["anthropic", "azure_ai", "bedrock", "bedrock_converse", "vertex_ai"] + for provider in providers: + unsupported = get_unsupported_headers(provider) + # Should return a list (even if empty) + assert isinstance(unsupported, list), f"Provider {provider} should return a list" + + +class TestEdgeCases: + """Test edge cases and error handling.""" + + def test_unknown_provider(self): + """Unknown provider with no config should pass through all headers.""" + result = filter_and_transform_beta_headers( + ["context-management-2025-06-27"], "unknown_provider" + ) + # Unknown providers have no unsupported list, so headers pass through + assert "context-management-2025-06-27" in result + + def test_whitespace_handling(self): + """Headers with whitespace should be handled correctly.""" + headers = [ + " context-management-2025-06-27 ", + " web-search-2025-03-05 ", + ] + result = filter_and_transform_beta_headers(headers, "anthropic") + assert len(result) == 2 + + def test_duplicate_headers(self): + """Duplicate headers should be deduplicated.""" + headers = [ + "context-management-2025-06-27", + "context-management-2025-06-27", + ] + result = filter_and_transform_beta_headers(headers, "anthropic") + assert len(result) == 1 + + def test_case_sensitivity(self): + """Headers should be case-sensitive.""" + # Correct case - should pass through for anthropic (no unsupported list) + headers = ["context-management-2025-06-27"] + result = filter_and_transform_beta_headers(headers, "anthropic") + assert len(result) == 1 + + # Wrong case - should still pass through (not in unsupported list) + headers = ["Context-Management-2025-06-27"] + result = filter_and_transform_beta_headers(headers, "anthropic") + assert len(result) == 1 # Passes through because anthropic has empty unsupported list diff --git a/tests/test_litellm/test_claude_opus_4_6_config.py b/tests/test_litellm/test_claude_opus_4_6_config.py index 8c00ad4b36f..071d0a26369 100644 --- a/tests/test_litellm/test_claude_opus_4_6_config.py +++ b/tests/test_litellm/test_claude_opus_4_6_config.py @@ -26,12 +26,6 @@ def test_opus_4_6_model_pricing_and_capabilities(): "tool_use_system_prompt_tokens": 346, "max_input_tokens": 1000000, }, - "anthropic.claude-opus-4-6-v1:0": { - "provider": "bedrock_converse", - "has_long_context_pricing": True, - "tool_use_system_prompt_tokens": 346, - "max_input_tokens": 1000000, - }, "anthropic.claude-opus-4-6-v1": { "provider": "bedrock_converse", "has_long_context_pricing": True, @@ -88,16 +82,6 @@ def test_opus_4_6_bedrock_regional_model_pricing(): model_data = json.load(f) expected_models = { - "global.anthropic.claude-opus-4-6-v1:0": { - "input_cost_per_token": 5e-06, - "output_cost_per_token": 2.5e-05, - "cache_creation_input_token_cost": 6.25e-06, - "cache_read_input_token_cost": 5e-07, - "input_cost_per_token_above_200k_tokens": 1e-05, - "output_cost_per_token_above_200k_tokens": 3.75e-05, - "cache_creation_input_token_cost_above_200k_tokens": 1.25e-05, - "cache_read_input_token_cost_above_200k_tokens": 1e-06, - }, "global.anthropic.claude-opus-4-6-v1": { "input_cost_per_token": 5e-06, "output_cost_per_token": 2.5e-05, @@ -108,16 +92,6 @@ def test_opus_4_6_bedrock_regional_model_pricing(): "cache_creation_input_token_cost_above_200k_tokens": 1.25e-05, "cache_read_input_token_cost_above_200k_tokens": 1e-06, }, - "us.anthropic.claude-opus-4-6-v1:0": { - "input_cost_per_token": 5.5e-06, - "output_cost_per_token": 2.75e-05, - "cache_creation_input_token_cost": 6.875e-06, - "cache_read_input_token_cost": 5.5e-07, - "input_cost_per_token_above_200k_tokens": 1.1e-05, - "output_cost_per_token_above_200k_tokens": 4.125e-05, - "cache_creation_input_token_cost_above_200k_tokens": 1.375e-05, - "cache_read_input_token_cost_above_200k_tokens": 1.1e-06, - }, "us.anthropic.claude-opus-4-6-v1": { "input_cost_per_token": 5.5e-06, "output_cost_per_token": 2.75e-05, @@ -128,16 +102,6 @@ def test_opus_4_6_bedrock_regional_model_pricing(): "cache_creation_input_token_cost_above_200k_tokens": 1.375e-05, "cache_read_input_token_cost_above_200k_tokens": 1.1e-06, }, - "eu.anthropic.claude-opus-4-6-v1:0": { - "input_cost_per_token": 5.5e-06, - "output_cost_per_token": 2.75e-05, - "cache_creation_input_token_cost": 6.875e-06, - "cache_read_input_token_cost": 5.5e-07, - "input_cost_per_token_above_200k_tokens": 1.1e-05, - "output_cost_per_token_above_200k_tokens": 4.125e-05, - "cache_creation_input_token_cost_above_200k_tokens": 1.375e-05, - "cache_read_input_token_cost_above_200k_tokens": 1.1e-06, - }, "eu.anthropic.claude-opus-4-6-v1": { "input_cost_per_token": 5.5e-06, "output_cost_per_token": 2.75e-05, @@ -148,7 +112,7 @@ def test_opus_4_6_bedrock_regional_model_pricing(): "cache_creation_input_token_cost_above_200k_tokens": 1.375e-05, "cache_read_input_token_cost_above_200k_tokens": 1.1e-06, }, - "apac.anthropic.claude-opus-4-6-v1:0": { + "apac.anthropic.claude-opus-4-6-v1": { "input_cost_per_token": 5.5e-06, "output_cost_per_token": 2.75e-05, "cache_creation_input_token_cost": 6.875e-06, @@ -212,14 +176,8 @@ def test_opus_4_6_alias_and_dated_metadata_match(): def test_opus_4_6_bedrock_converse_registration(): - assert "anthropic.claude-opus-4-6-v1:0" in litellm.BEDROCK_CONVERSE_MODELS assert "anthropic.claude-opus-4-6-v1" in litellm.BEDROCK_CONVERSE_MODELS - assert "anthropic.claude-opus-4-6-v1" in litellm.bedrock_converse_models - assert "global.anthropic.claude-opus-4-6-v1:0" in litellm.bedrock_converse_models assert "global.anthropic.claude-opus-4-6-v1" in litellm.bedrock_converse_models - assert "us.anthropic.claude-opus-4-6-v1:0" in litellm.bedrock_converse_models assert "us.anthropic.claude-opus-4-6-v1" in litellm.bedrock_converse_models - assert "eu.anthropic.claude-opus-4-6-v1:0" in litellm.bedrock_converse_models assert "eu.anthropic.claude-opus-4-6-v1" in litellm.bedrock_converse_models - assert "apac.anthropic.claude-opus-4-6-v1:0" in litellm.bedrock_converse_models assert "apac.anthropic.claude-opus-4-6-v1" in litellm.bedrock_converse_models