diff --git a/litellm/llms/azure_ai/anthropic/messages_transformation.py b/litellm/llms/azure_ai/anthropic/messages_transformation.py index a4dc88f9c6..8e60e84391 100644 --- a/litellm/llms/azure_ai/anthropic/messages_transformation.py +++ b/litellm/llms/azure_ai/anthropic/messages_transformation.py @@ -1,7 +1,7 @@ """ Azure Anthropic messages transformation config - extends AnthropicMessagesConfig with Azure authentication """ -from typing import TYPE_CHECKING, Any, List, Optional, Tuple +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple from litellm.llms.anthropic.experimental_pass_through.messages.transformation import ( AnthropicMessagesConfig, @@ -114,3 +114,53 @@ def get_complete_url( return api_base + def _remove_scope_from_cache_control( + self, anthropic_messages_request: Dict + ) -> None: + """ + Remove `scope` field from cache_control for Azure AI Foundry. + + Azure AI Foundry's Anthropic endpoint does not support the `scope` field + (e.g., "global" for cross-request caching). Only `type` and `ttl` are supported. + + Processes both `system` and `messages` content blocks. + """ + def _sanitize(cache_control: Any) -> None: + if isinstance(cache_control, dict): + cache_control.pop("scope", None) + + def _process_content_list(content: list) -> None: + for item in content: + if isinstance(item, dict) and "cache_control" in item: + _sanitize(item["cache_control"]) + + if "system" in anthropic_messages_request: + system = anthropic_messages_request["system"] + if isinstance(system, list): + _process_content_list(system) + + if "messages" in anthropic_messages_request: + for message in anthropic_messages_request["messages"]: + if isinstance(message, dict) and "content" in message: + content = message["content"] + if isinstance(content, list): + _process_content_list(content) + + def transform_anthropic_messages_request( + self, + model: str, + messages: List[Dict], + anthropic_messages_optional_request_params: Dict, + litellm_params: GenericLiteLLMParams, + headers: dict, + ) -> Dict: + anthropic_messages_request = super().transform_anthropic_messages_request( + model=model, + messages=messages, + anthropic_messages_optional_request_params=anthropic_messages_optional_request_params, + litellm_params=litellm_params, + headers=headers, + ) + self._remove_scope_from_cache_control(anthropic_messages_request) + return anthropic_messages_request + diff --git a/litellm/llms/bedrock/messages/invoke_transformations/anthropic_claude3_transformation.py b/litellm/llms/bedrock/messages/invoke_transformations/anthropic_claude3_transformation.py index 03885ff208..f0aa643b34 100644 --- a/litellm/llms/bedrock/messages/invoke_transformations/anthropic_claude3_transformation.py +++ b/litellm/llms/bedrock/messages/invoke_transformations/anthropic_claude3_transformation.py @@ -118,10 +118,13 @@ def _remove_ttl_from_cache_control( self, anthropic_messages_request: Dict, model: Optional[str] = None ) -> None: """ - Remove `ttl` field from cache_control in messages. - Bedrock doesn't support the ttl field in cache_control. + Remove unsupported fields from cache_control for Bedrock. - Update: Bedock supports `5m` and `1h` for Claude 4.5 models. + Bedrock only supports `type` and `ttl` in cache_control. It does NOT support: + - `scope` (e.g., "global") - always removed + - `ttl` - removed for older models; Claude 4.5+ supports "5m" and "1h" + + Processes both `system` and `messages` content blocks. Args: anthropic_messages_request: The request dictionary to modify in-place @@ -131,23 +134,36 @@ def _remove_ttl_from_cache_control( if model: is_claude_4_5 = self._is_claude_4_5_on_bedrock(model) + def _sanitize_cache_control(cache_control: dict) -> None: + if not isinstance(cache_control, dict): + return + # Bedrock doesn't support scope (e.g., "global" for cross-request caching) + cache_control.pop("scope", None) + # Remove ttl for models that don't support it + if "ttl" in cache_control: + ttl = cache_control["ttl"] + if is_claude_4_5 and ttl in ["5m", "1h"]: + return + cache_control.pop("ttl", None) + + def _process_content_list(content: list) -> None: + for item in content: + if isinstance(item, dict) and "cache_control" in item: + _sanitize_cache_control(item["cache_control"]) + + # Process system (list of content blocks) + if "system" in anthropic_messages_request: + system = anthropic_messages_request["system"] + if isinstance(system, list): + _process_content_list(system) + + # Process messages if "messages" in anthropic_messages_request: for message in anthropic_messages_request["messages"]: if isinstance(message, dict) and "content" in message: content = message["content"] if isinstance(content, list): - for item in content: - if isinstance(item, dict) and "cache_control" in item: - cache_control = item["cache_control"] - if ( - isinstance(cache_control, dict) - and "ttl" in cache_control - ): - ttl = cache_control["ttl"] - if is_claude_4_5 and ttl in ["5m", "1h"]: - continue - - cache_control.pop("ttl", None) + _process_content_list(content) def _supports_extended_thinking_on_bedrock(self, model: str) -> bool: """ diff --git a/tests/test_litellm/llms/azure_ai/claude/test_azure_anthropic_messages_transformation.py b/tests/test_litellm/llms/azure_ai/claude/test_azure_anthropic_messages_transformation.py index bdced849c7..83653bc037 100644 --- a/tests/test_litellm/llms/azure_ai/claude/test_azure_anthropic_messages_transformation.py +++ b/tests/test_litellm/llms/azure_ai/claude/test_azure_anthropic_messages_transformation.py @@ -239,6 +239,50 @@ def test_get_supported_anthropic_messages_params(self): assert "tools" in params assert "tool_choice" in params + def test_transform_anthropic_messages_request_removes_scope_from_cache_control( + self, + ): + """Test that scope is removed from cache_control (Azure AI Foundry doesn't support it)""" + config = AzureAnthropicMessagesConfig() + model = "claude-sonnet-4-5" + messages = [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Hello", + "cache_control": {"type": "ephemeral", "scope": "global"}, + } + ], + } + ] + anthropic_messages_optional_request_params = { + "max_tokens": 1024, + "system": [ + { + "type": "text", + "text": "You are an AI assistant.", + "cache_control": {"type": "ephemeral", "scope": "global"}, + } + ], + } + litellm_params = GenericLiteLLMParams() + headers = {} + + result = config.transform_anthropic_messages_request( + model=model, + messages=messages, + anthropic_messages_optional_request_params=anthropic_messages_optional_request_params, + litellm_params=litellm_params, + headers=headers, + ) + + assert "scope" not in result["system"][0]["cache_control"] + assert result["system"][0]["cache_control"]["type"] == "ephemeral" + assert "scope" not in result["messages"][0]["content"][0]["cache_control"] + assert result["messages"][0]["content"][0]["cache_control"]["type"] == "ephemeral" + class TestProviderConfigManagerAzureAnthropicMessages: """Test ProviderConfigManager returns correct config for Azure AI Anthropic Messages API""" diff --git a/tests/test_litellm/llms/bedrock/messages/invoke_transformations/test_anthropic_claude3_transformation.py b/tests/test_litellm/llms/bedrock/messages/invoke_transformations/test_anthropic_claude3_transformation.py index a4da4ebb68..ee4c7828c3 100644 --- a/tests/test_litellm/llms/bedrock/messages/invoke_transformations/test_anthropic_claude3_transformation.py +++ b/tests/test_litellm/llms/bedrock/messages/invoke_transformations/test_anthropic_claude3_transformation.py @@ -178,3 +178,48 @@ def test_remove_ttl_from_cache_control(): request5 = {} cfg._remove_ttl_from_cache_control(request5) assert request5 == {} + + +def test_remove_scope_from_cache_control(): + """Ensure scope field is removed from cache_control for Bedrock (not supported).""" + + cfg = AmazonAnthropicClaudeMessagesConfig() + + # Test case 1: System with cache_control containing scope + request = { + "system": [ + { + "type": "text", + "text": "You are an AI assistant.", + "cache_control": { + "type": "ephemeral", + "scope": "global", + }, + } + ], + "messages": [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Hello", + "cache_control": { + "type": "ephemeral", + "scope": "global", + }, + } + ], + } + ], + } + + cfg._remove_ttl_from_cache_control(request) + + # Verify scope is removed from system + assert "scope" not in request["system"][0]["cache_control"] + assert request["system"][0]["cache_control"]["type"] == "ephemeral" + + # Verify scope is removed from messages + assert "scope" not in request["messages"][0]["content"][0]["cache_control"] + assert request["messages"][0]["content"][0]["cache_control"]["type"] == "ephemeral"