diff --git a/litellm/llms/anthropic/experimental_pass_through/messages/transformation.py b/litellm/llms/anthropic/experimental_pass_through/messages/transformation.py index 790e7901960..f67e4c8382c 100644 --- a/litellm/llms/anthropic/experimental_pass_through/messages/transformation.py +++ b/litellm/llms/anthropic/experimental_pass_through/messages/transformation.py @@ -2,7 +2,8 @@ import httpx -from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj, verbose_logger +from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj +from litellm.litellm_core_utils.litellm_logging import verbose_logger from litellm.llms.base_llm.anthropic_messages.transformation import ( BaseAnthropicMessagesConfig, ) @@ -13,9 +14,10 @@ from litellm.types.llms.anthropic_messages.anthropic_response import ( AnthropicMessagesResponse, ) +from litellm.types.llms.anthropic_tool_search import get_tool_search_beta_header from litellm.types.router import GenericLiteLLMParams -from ...common_utils import AnthropicError +from ...common_utils import AnthropicError, AnthropicModelInfo DEFAULT_ANTHROPIC_API_BASE = "https://api.anthropic.com" DEFAULT_ANTHROPIC_API_VERSION = "2023-06-01" @@ -75,9 +77,9 @@ def validate_anthropic_messages_environment( if "content-type" not in headers: headers["content-type"] = "application/json" - headers = self._update_headers_with_optional_anthropic_beta( + headers = self._update_headers_with_anthropic_beta( headers=headers, - context_management=optional_params.get("context_management"), + optional_params=optional_params, ) return headers, api_base @@ -153,16 +155,44 @@ def get_async_streaming_response_iterator( ) @staticmethod - def _update_headers_with_optional_anthropic_beta( - headers: dict, context_management: Optional[Dict] + def _update_headers_with_anthropic_beta( + headers: dict, + optional_params: dict, + custom_llm_provider: str = "anthropic", ) -> dict: - if context_management is None: - return headers - + """ + Auto-inject anthropic-beta headers based on features used. + + Handles: + - context_management: adds 'context-management-2025-06-27' + - tool_search: adds provider-specific tool search header + + Args: + headers: Request headers dict + optional_params: Optional parameters including tools, context_management + custom_llm_provider: Provider name for looking up correct tool search header + """ + beta_values: set = set() + + # Get existing beta headers if any existing_beta = headers.get("anthropic-beta") - beta_value = ANTHROPIC_BETA_HEADER_VALUES.CONTEXT_MANAGEMENT_2025_06_27.value - if existing_beta is None: - headers["anthropic-beta"] = beta_value - elif beta_value not in [beta.strip() for beta in existing_beta.split(",")]: - headers["anthropic-beta"] = f"{existing_beta}, {beta_value}" + if existing_beta: + beta_values.update(b.strip() for b in existing_beta.split(",")) + + # Check for context management + if optional_params.get("context_management") is not None: + beta_values.add(ANTHROPIC_BETA_HEADER_VALUES.CONTEXT_MANAGEMENT_2025_06_27.value) + + # Check for tool search tools + tools = optional_params.get("tools") + if tools: + anthropic_model_info = AnthropicModelInfo() + if anthropic_model_info.is_tool_search_used(tools): + # Use provider-specific tool search header + tool_search_header = get_tool_search_beta_header(custom_llm_provider) + beta_values.add(tool_search_header) + + if beta_values: + headers["anthropic-beta"] = ",".join(sorted(beta_values)) + return headers diff --git a/litellm/llms/azure_ai/anthropic/messages_transformation.py b/litellm/llms/azure_ai/anthropic/messages_transformation.py index 55818cc07d6..0d00c907031 100644 --- a/litellm/llms/azure_ai/anthropic/messages_transformation.py +++ b/litellm/llms/azure_ai/anthropic/messages_transformation.py @@ -62,10 +62,10 @@ def validate_anthropic_messages_environment( if "content-type" not in headers: headers["content-type"] = "application/json" - # Update headers with optional anthropic beta features - headers = self._update_headers_with_optional_anthropic_beta( + # Update headers with anthropic beta features (context management, tool search, etc.) + headers = self._update_headers_with_anthropic_beta( headers=headers, - context_management=optional_params.get("context_management"), + optional_params=optional_params, ) return headers, api_base diff --git a/litellm/llms/bedrock/messages/invoke_transformations/anthropic_claude3_transformation.py b/litellm/llms/bedrock/messages/invoke_transformations/anthropic_claude3_transformation.py index 81225159a7c..fa5002fcad8 100644 --- a/litellm/llms/bedrock/messages/invoke_transformations/anthropic_claude3_transformation.py +++ b/litellm/llms/bedrock/messages/invoke_transformations/anthropic_claude3_transformation.py @@ -129,6 +129,37 @@ def _remove_ttl_from_cache_control( if isinstance(cache_control, dict) and "ttl" in cache_control: cache_control.pop("ttl", None) + def _get_tool_search_beta_header_for_bedrock( + self, + model: str, + tool_search_used: bool, + programmatic_tool_calling_used: bool, + input_examples_used: bool, + beta_set: set, + ) -> None: + """ + Adjust tool search beta header for Bedrock. + + Bedrock requires a different beta header for tool search on Opus 4 models + when tool search is used without programmatic tool calling or input examples. + + Note: On Amazon Bedrock, server-side tool search is only supported on Claude Opus 4 + with the `tool-search-tool-2025-10-19` beta header. + + Ref: https://platform.claude.com/docs/en/agents-and-tools/tool-use/tool-search-tool + + Args: + model: The model name + tool_search_used: Whether tool search is used + programmatic_tool_calling_used: Whether programmatic tool calling is used + input_examples_used: Whether input examples are used + beta_set: The set of beta headers to modify in-place + """ + if tool_search_used and not (programmatic_tool_calling_used or input_examples_used): + beta_set.discard(ANTHROPIC_TOOL_SEARCH_BETA_HEADER) + if "opus-4" in model.lower() or "opus_4" in model.lower(): + beta_set.add("tool-search-tool-2025-10-19") + def transform_anthropic_messages_request( self, model: str, @@ -189,13 +220,13 @@ def transform_anthropic_messages_request( ) beta_set.update(auto_betas) - if ( - tool_search_used - and not (programmatic_tool_calling_used or input_examples_used) - ): - beta_set.discard(ANTHROPIC_TOOL_SEARCH_BETA_HEADER) - if "opus-4" in model.lower() or "opus_4" in model.lower(): - beta_set.add("tool-search-tool-2025-10-19") + self._get_tool_search_beta_header_for_bedrock( + model=model, + tool_search_used=tool_search_used, + programmatic_tool_calling_used=programmatic_tool_calling_used, + input_examples_used=input_examples_used, + beta_set=beta_set, + ) if beta_set: anthropic_messages_request["anthropic_beta"] = list(beta_set) diff --git a/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/experimental_pass_through/transformation.py b/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/experimental_pass_through/transformation.py index c22072af2f3..0bedef3276b 100644 --- a/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/experimental_pass_through/transformation.py +++ b/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/experimental_pass_through/transformation.py @@ -1,11 +1,16 @@ from typing import Any, Dict, List, Optional, Tuple +from litellm.llms.anthropic.common_utils import AnthropicModelInfo from litellm.llms.anthropic.experimental_pass_through.messages.transformation import ( AnthropicMessagesConfig, ) +from litellm.types.llms.anthropic import ( + ANTHROPIC_BETA_HEADER_VALUES, + ANTHROPIC_HOSTED_TOOLS, +) +from litellm.types.llms.anthropic_tool_search import get_tool_search_beta_header from litellm.types.llms.vertex_ai import VertexPartnerProvider from litellm.types.router import GenericLiteLLMParams -from litellm.types.llms.anthropic import ANTHROPIC_BETA_HEADER_VALUES, ANTHROPIC_HOSTED_TOOLS from ....vertex_llm_base import VertexBase @@ -51,13 +56,28 @@ def validate_anthropic_messages_environment( headers["content-type"] = "application/json" - # Add web search beta header for Vertex AI only if not already set - if "anthropic-beta" not in headers: - tools = optional_params.get("tools", []) - for tool in tools: - if isinstance(tool, dict) and tool.get("type", "").startswith(ANTHROPIC_HOSTED_TOOLS.WEB_SEARCH.value): - headers["anthropic-beta"] = ANTHROPIC_BETA_HEADER_VALUES.WEB_SEARCH_2025_03_05.value - break + # Add beta headers for Vertex AI + tools = optional_params.get("tools", []) + beta_values: set[str] = set() + + # Get existing beta headers if any + existing_beta = headers.get("anthropic-beta") + if existing_beta: + beta_values.update(b.strip() for b in existing_beta.split(",")) + + # Check for web search tool + for tool in tools: + if isinstance(tool, dict) and tool.get("type", "").startswith(ANTHROPIC_HOSTED_TOOLS.WEB_SEARCH.value): + beta_values.add(ANTHROPIC_BETA_HEADER_VALUES.WEB_SEARCH_2025_03_05.value) + break + + # Check for tool search tools - Vertex AI uses different beta header + anthropic_model_info = AnthropicModelInfo() + if anthropic_model_info.is_tool_search_used(tools): + beta_values.add(get_tool_search_beta_header("vertex_ai")) + + if beta_values: + headers["anthropic-beta"] = ",".join(beta_values) return headers, api_base diff --git a/litellm/proxy/hooks/parallel_request_limiter_v3.py b/litellm/proxy/hooks/parallel_request_limiter_v3.py index 4d17cca22ad..b5bbb4237c1 100644 --- a/litellm/proxy/hooks/parallel_request_limiter_v3.py +++ b/litellm/proxy/hooks/parallel_request_limiter_v3.py @@ -1236,7 +1236,7 @@ def _create_pipeline_operations( return pipeline_operations def _get_total_tokens_from_usage( - self, usage: Any | None, rate_limit_type: Literal["output", "input", "total"] + self, usage: Optional[Any], rate_limit_type: Literal["output", "input", "total"] ) -> int: """ Get total tokens from response usage for rate limiting. diff --git a/litellm/types/llms/anthropic.py b/litellm/types/llms/anthropic.py index 7d901a0fa65..779a6950d92 100644 --- a/litellm/types/llms/anthropic.py +++ b/litellm/types/llms/anthropic.py @@ -636,8 +636,10 @@ class ANTHROPIC_BETA_HEADER_VALUES(str, Enum): ADVANCED_TOOL_USE_2025_11_20 = "advanced-tool-use-2025-11-20" -# Tool search beta header constant +# Tool search beta header constant (for Anthropic direct API and Microsoft Foundry) ANTHROPIC_TOOL_SEARCH_BETA_HEADER = "advanced-tool-use-2025-11-20" # Effort beta header constant ANTHROPIC_EFFORT_BETA_HEADER = "effort-2025-11-24" + + diff --git a/litellm/types/llms/anthropic_tool_search.py b/litellm/types/llms/anthropic_tool_search.py new file mode 100644 index 00000000000..d8656ce8bb3 --- /dev/null +++ b/litellm/types/llms/anthropic_tool_search.py @@ -0,0 +1,36 @@ +""" +Tool Search Beta Header Configuration + +Reference: https://platform.claude.com/docs/en/agents-and-tools/tool-use/tool-search-tool +""" + +from typing import Dict + +from litellm.types.utils import LlmProviders + +# Tool search beta header values +TOOL_SEARCH_BETA_HEADER_ANTHROPIC = "advanced-tool-use-2025-11-20" +TOOL_SEARCH_BETA_HEADER_VERTEX = "tool-search-tool-2025-10-19" +TOOL_SEARCH_BETA_HEADER_BEDROCK = "tool-search-tool-2025-10-19" + + +# Mapping of custom_llm_provider -> tool search beta header +TOOL_SEARCH_BETA_HEADER_BY_PROVIDER: Dict[str, str] = { + LlmProviders.ANTHROPIC.value: TOOL_SEARCH_BETA_HEADER_ANTHROPIC, + LlmProviders.AZURE.value: TOOL_SEARCH_BETA_HEADER_ANTHROPIC, + LlmProviders.AZURE_AI.value: TOOL_SEARCH_BETA_HEADER_ANTHROPIC, + LlmProviders.VERTEX_AI.value: TOOL_SEARCH_BETA_HEADER_VERTEX, + LlmProviders.VERTEX_AI_BETA.value: TOOL_SEARCH_BETA_HEADER_VERTEX, + LlmProviders.BEDROCK.value: TOOL_SEARCH_BETA_HEADER_BEDROCK, +} + + +def get_tool_search_beta_header(custom_llm_provider: str) -> str: + """ + Get the tool search beta header for a given provider. + """ + return TOOL_SEARCH_BETA_HEADER_BY_PROVIDER.get( + custom_llm_provider, + TOOL_SEARCH_BETA_HEADER_ANTHROPIC + ) + diff --git a/tests/pass_through_unit_tests/base_anthropic_messages_tool_search_test.py b/tests/pass_through_unit_tests/base_anthropic_messages_tool_search_test.py new file mode 100644 index 00000000000..590e746b39c --- /dev/null +++ b/tests/pass_through_unit_tests/base_anthropic_messages_tool_search_test.py @@ -0,0 +1,294 @@ +""" +Base test class for Anthropic Messages API tool search E2E tests. + +Tests that tool search works correctly via litellm.anthropic.messages interface +by making actual API calls and validating that tool search discovers deferred tools. + +Reference: https://platform.claude.com/docs/en/agents-and-tools/tool-use/tool-search-tool +""" + +import json +import os +import sys +from abc import ABC, abstractmethod +from typing import Any, Dict, List + +sys.path.insert(0, os.path.abspath("../../..")) + +import pytest +import litellm + + +# Sample tools for tool search testing +def get_deferred_tools() -> List[Dict[str, Any]]: + """ + Returns a list of tools with defer_loading: true. + These tools should only be discovered via tool search. + """ + return [ + { + "name": "get_weather", + "description": "Get the current weather for a location", + "input_schema": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA" + } + }, + "required": ["location"] + }, + "defer_loading": True + }, + { + "name": "get_stock_price", + "description": "Get the current stock price for a ticker symbol", + "input_schema": { + "type": "object", + "properties": { + "ticker": { + "type": "string", + "description": "The stock ticker symbol, e.g. AAPL" + } + }, + "required": ["ticker"] + }, + "defer_loading": True + }, + { + "name": "search_web", + "description": "Search the web for information", + "input_schema": { + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "The search query" + } + }, + "required": ["query"] + }, + "defer_loading": True + }, + ] + + +def get_tool_search_tool_regex() -> Dict[str, Any]: + """Returns the tool search tool using regex variant.""" + return { + "type": "tool_search_tool_regex_20251119", + "name": "tool_search_tool_regex" + } + + +def get_tool_search_tool_bm25() -> Dict[str, Any]: + """Returns the tool search tool using BM25 variant.""" + return { + "type": "tool_search_tool_bm25_20251119", + "name": "tool_search_tool_bm25" + } + + +class BaseAnthropicMessagesToolSearchTest(ABC): + """ + Base test class for tool search E2E tests across different providers. + + Subclasses must implement: + - get_model(): Returns the model string to use for tests + + Tests pass the anthropic-beta header via extra_headers to validate + that the header is correctly forwarded to downstream providers. + """ + + + @abstractmethod + def get_model(self) -> str: + """ + Returns the model string to use for tests. + + Examples: + - "anthropic/claude-sonnet-4-20250514" + - "vertex_ai/claude-sonnet-4@20250514" + - "bedrock/invoke/anthropic.claude-sonnet-4-20250514-v1:0" + """ + pass + + def get_extra_headers(self) -> Dict[str, str]: + """ + Returns extra headers to pass with the request. + Includes the anthropic-beta header for tool search. + + This is what claude code forwards, simulate the same behavior here. + """ + return {"anthropic-beta": "advanced-tool-use-2025-11-20"} + + def get_tools_with_tool_search(self) -> List[Dict[str, Any]]: + """ + Returns tools list with tool search tool and deferred tools. + """ + return [get_tool_search_tool_regex()] + get_deferred_tools() + + @pytest.mark.asyncio + async def test_tool_search_basic_request(self): + """ + E2E test: Basic tool search request should succeed. + + This validates that the tool search beta header is being passed via + extra_headers and forwarded correctly to the downstream provider. + """ + litellm._turn_on_debug() + + tools = self.get_tools_with_tool_search() + messages = [ + { + "role": "user", + "content": "What's the weather in San Francisco?" + } + ] + + response = await litellm.anthropic.messages.acreate( + model=self.get_model(), + messages=messages, + tools=tools, + max_tokens=1024, + extra_headers=self.get_extra_headers(), + ) + + print(f"Response: {json.dumps(response, indent=2, default=str)}") + + # Validate response structure + assert "content" in response, "Response should contain content" + assert "usage" in response, "Response should contain usage" + + # The model should either respond with text or use a tool + content = response.get("content", []) + assert len(content) > 0, "Response should have content" + + @pytest.mark.asyncio + async def test_tool_search_discovers_tool(self): + """ + E2E test: Tool search should discover and use a deferred tool. + + This validates that when the user asks about weather, the model + discovers the get_weather tool via tool search and attempts to use it. + """ + litellm._turn_on_debug() + + tools = self.get_tools_with_tool_search() + messages = [ + { + "role": "user", + "content": "I need to know the current weather in New York City. Please use the appropriate tool." + } + ] + + response = await litellm.anthropic.messages.acreate( + model=self.get_model(), + messages=messages, + tools=tools, + max_tokens=1024, + extra_headers=self.get_extra_headers(), + ) + + print(f"Response: {json.dumps(response, indent=2, default=str)}") + + content = response.get("content", []) + + # Check if the model used tool_use (either tool_search or get_weather) + tool_uses = [block for block in content if block.get("type") == "tool_use"] + + print(f"Tool uses: {json.dumps(tool_uses, indent=2, default=str)}") + + # The model should attempt to use tools when asked about weather + # It might use tool_search first, or directly use get_weather if discovered + if response.get("stop_reason") == "tool_use": + assert len(tool_uses) > 0, "Expected tool_use blocks when stop_reason is tool_use" + + @pytest.mark.asyncio + async def test_tool_search_streaming(self): + """ + E2E test: Tool search should work with streaming responses. + """ + litellm._turn_on_debug() + + tools = self.get_tools_with_tool_search() + messages = [ + { + "role": "user", + "content": "What's the weather like in Tokyo?" + } + ] + + response = await litellm.anthropic.messages.acreate( + model=self.get_model(), + messages=messages, + tools=tools, + max_tokens=1024, + stream=True, + extra_headers=self.get_extra_headers(), + ) + + # Collect all chunks + chunks = [] + async for chunk in response: + if isinstance(chunk, bytes): + chunk_str = chunk.decode("utf-8") + for line in chunk_str.split("\n"): + if line.startswith("data: "): + try: + json_data = json.loads(line[6:]) + chunks.append(json_data) + print(f"Chunk: {json.dumps(json_data, indent=2, default=str)}") + except json.JSONDecodeError: + pass + elif isinstance(chunk, dict): + chunks.append(chunk) + print(f"Chunk: {json.dumps(chunk, indent=2, default=str)}") + + # Should have received chunks + assert len(chunks) > 0, "Expected to receive streaming chunks" + + # Should have message_start + message_starts = [c for c in chunks if c.get("type") == "message_start"] + assert len(message_starts) > 0, "Expected message_start in streaming response" + + @pytest.mark.asyncio + async def test_tool_search_with_multiple_deferred_tools(self): + """ + E2E test: Tool search should work with multiple deferred tools. + + This validates that the model can discover the appropriate tool + from a larger catalog of deferred tools. + """ + litellm._turn_on_debug() + + tools = self.get_tools_with_tool_search() + messages = [ + { + "role": "user", + "content": "What's the stock price of Apple (AAPL)?" + } + ] + + response = await litellm.anthropic.messages.acreate( + model=self.get_model(), + messages=messages, + tools=tools, + max_tokens=1024, + extra_headers=self.get_extra_headers(), + ) + + print(f"Response: {json.dumps(response, indent=2, default=str)}") + + # Validate response + assert "content" in response, "Response should contain content" + + content = response.get("content", []) + tool_uses = [block for block in content if block.get("type") == "tool_use"] + + # If the model decides to use a tool, it should be related to stocks + if tool_uses: + tool_names = [t.get("name") for t in tool_uses] + print(f"Tools used: {tool_names}") + diff --git a/tests/pass_through_unit_tests/test_anthropic_messages_tool_search.py b/tests/pass_through_unit_tests/test_anthropic_messages_tool_search.py new file mode 100644 index 00000000000..4914a3df77a --- /dev/null +++ b/tests/pass_through_unit_tests/test_anthropic_messages_tool_search.py @@ -0,0 +1,83 @@ +""" +E2E Test suite for Anthropic Messages API tool search across different providers. + +Tests that tool search works correctly via litellm.anthropic.messages interface +by making actual API calls. + +Supported providers: +- Anthropic API: advanced-tool-use-2025-11-20 +- Azure Anthropic: advanced-tool-use-2025-11-20 +- Vertex AI: tool-search-tool-2025-10-19 +- Bedrock Invoke: tool-search-tool-2025-10-19 + +Reference: https://platform.claude.com/docs/en/agents-and-tools/tool-use/tool-search-tool +""" + +import os +import sys + +sys.path.insert(0, os.path.abspath("../../..")) + +import pytest +from base_anthropic_messages_tool_search_test import ( + BaseAnthropicMessagesToolSearchTest, +) + + +class TestAnthropicAPIToolSearch(BaseAnthropicMessagesToolSearchTest): + """ + E2E tests for tool search with Anthropic API directly. + + Uses the anthropic/ prefix which routes through the native + Anthropic Messages API. + + Beta header: advanced-tool-use-2025-11-20 + + Note: Tool search is only supported on Claude Opus 4.5 and Claude Sonnet 4.5. + """ + + def get_model(self) -> str: + return "anthropic/claude-sonnet-4-5-20250929" + + +# class TestAzureAnthropicToolSearch(BaseAnthropicMessagesToolSearchTest): +# """ +# E2E tests for tool search with Azure Anthropic (Microsoft Foundry). + +# Uses the azure/ prefix which routes through Azure's Anthropic endpoint. + +# Beta header: advanced-tool-use-2025-11-20 +# """ + +# def get_model(self) -> str: +# return "azure/claude-sonnet-4-20250514" + + +# class TestVertexAIToolSearch(BaseAnthropicMessagesToolSearchTest): +# """ +# E2E tests for tool search with Vertex AI. + +# Uses the vertex_ai/ prefix which routes through Google Cloud's +# Vertex AI Anthropic partner models. + +# Beta header: tool-search-tool-2025-10-19 +# """ + +# def get_model(self) -> str: +# return "vertex_ai/claude-sonnet-4@20250514" + + +class TestBedrockInvokeToolSearch(BaseAnthropicMessagesToolSearchTest): + """ + E2E tests for tool search with Bedrock Invoke API. + + Uses the bedrock/invoke/ prefix which routes through the native + Anthropic Messages API format on Bedrock. + + Beta header: advanced-tool-use-2025-11-20 (passed via extra_headers) + + Note: Tool search on Bedrock is only supported on Claude Opus 4.5. + """ + + def get_model(self) -> str: + return "bedrock/invoke/us.anthropic.claude-opus-4-5-20251101-v1:0"