From d0c82c7eba96b0adc714ecd9e6211b9fce5e9c2f Mon Sep 17 00:00:00 2001 From: Ishaan Jaffer Date: Tue, 20 Jan 2026 11:58:17 -0800 Subject: [PATCH 01/13] fix count_tokens_with_anthropic_api --- litellm/proxy/utils.py | 71 +----------------------------------------- 1 file changed, 1 insertion(+), 70 deletions(-) diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index fcb678ef02a..e0855333e26 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -131,7 +131,6 @@ unified_guardrail = UnifiedLLMGuardrails() -_anthropic_async_clients = {} def print_verbose(print_statement): """ @@ -961,8 +960,8 @@ async def _process_guardrail_callback( Returns: Updated data dictionary if guardrail passes, None if guardrail should be skipped """ - from litellm.types.guardrails import GuardrailEventHooks from litellm.integrations.prometheus import PrometheusLogger + from litellm.types.guardrails import GuardrailEventHooks # Determine the event type based on call type event_type = GuardrailEventHooks.pre_call @@ -4292,74 +4291,6 @@ def construct_database_url_from_env_vars() -> Optional[str]: return None -async def count_tokens_with_anthropic_api( - model_to_use: str, - messages: Optional[List[Dict[str, Any]]], - deployment: Optional[Dict[str, Any]] = None, -) -> Optional[Dict[str, Any]]: - """ - Helper function to count tokens using Anthropic API directly. - - Args: - model_to_use: The model name to use for token counting - messages: The messages to count tokens for - deployment: Optional deployment configuration containing API key - - Returns: - Optional dict with token count and tokenizer info, or None if failed - """ - if not messages: - return None - - try: - import os - - import anthropic - - # Get Anthropic API key from deployment config - anthropic_api_key = None - if deployment is not None: - anthropic_api_key = deployment.get("litellm_params", {}).get("api_key") - - # Fallback to environment variable - if not anthropic_api_key: - anthropic_api_key = os.getenv("ANTHROPIC_API_KEY") - - if anthropic_api_key and messages: - # Call Anthropic API directly for more accurate token counting - - # Use cached client if available to avoid socket exhaustion - if anthropic_api_key not in _anthropic_async_clients: - _anthropic_async_clients[anthropic_api_key] = anthropic.AsyncAnthropic(api_key=anthropic_api_key) - - client = _anthropic_async_clients[anthropic_api_key] - - # Call with explicit parameters to satisfy type checking - # Type ignore for now since messages come from generic dict input - response = await client.beta.messages.count_tokens( - model=model_to_use, - messages=messages, # type: ignore - betas=["token-counting-2024-11-01"], - ) - total_tokens = response.input_tokens - tokenizer_used = "anthropic_api" - - return { - "total_tokens": total_tokens, - "tokenizer_used": tokenizer_used, - } - - except ImportError: - verbose_proxy_logger.warning( - "Anthropic library not available, falling back to LiteLLM tokenizer" - ) - except Exception as e: - verbose_proxy_logger.warning( - f"Error calling Anthropic API: {e}, falling back to LiteLLM tokenizer" - ) - return None - - async def get_available_models_for_user( user_api_key_dict: "UserAPIKeyAuth", llm_router: Optional["Router"], From 10a6a8c3ee08fe7f125a08e24c87230fd97b7acf Mon Sep 17 00:00:00 2001 From: Ishaan Jaffer Date: Tue, 20 Jan 2026 12:13:55 -0800 Subject: [PATCH 02/13] remove outdated file --- tests/test_litellm/test_utils_custom.py | 45 ------------------------- 1 file changed, 45 deletions(-) delete mode 100644 tests/test_litellm/test_utils_custom.py diff --git a/tests/test_litellm/test_utils_custom.py b/tests/test_litellm/test_utils_custom.py deleted file mode 100644 index 3e924e9c719..00000000000 --- a/tests/test_litellm/test_utils_custom.py +++ /dev/null @@ -1,45 +0,0 @@ -import pytest -import sys -from unittest.mock import MagicMock, patch, AsyncMock -from litellm.proxy.utils import count_tokens_with_anthropic_api, _anthropic_async_clients - -@pytest.mark.asyncio -async def test_count_tokens_caching(): - """ - Test that count_tokens_with_anthropic_api caches the client. - """ - # Clear cache - _anthropic_async_clients.clear() - - api_key = "sk-ant-test-key" - messages = [{"role": "user", "content": "hello"}] - model = "claude-3-opus-20240229" - - # Create a mock anthropic module - mock_anthropic = MagicMock() - mock_client = MagicMock() - mock_anthropic.AsyncAnthropic.return_value = mock_client - - # Mock response - mock_response = MagicMock() - mock_response.input_tokens = 10 - - # Setup async return for count_tokens - mock_client.beta.messages.count_tokens = AsyncMock(return_value=mock_response) - - # Patch sys.modules to ensure our mock is used when anthropic is imported - with patch.dict(sys.modules, {"anthropic": mock_anthropic}): - # First call - with patch.dict("os.environ", {"ANTHROPIC_API_KEY": api_key}): - await count_tokens_with_anthropic_api(model, messages) - - assert api_key in _anthropic_async_clients - assert _anthropic_async_clients[api_key] == mock_client - mock_anthropic.AsyncAnthropic.assert_called_once() # Should be called once - - # Second call - with patch.dict("os.environ", {"ANTHROPIC_API_KEY": api_key}): - await count_tokens_with_anthropic_api(model, messages) - - # Should still be called once (cached) - mock_anthropic.AsyncAnthropic.assert_called_once() From fa5b07d6a65d08dbaabf345fc2603158d8fac91f Mon Sep 17 00:00:00 2001 From: Ishaan Jaffer Date: Tue, 20 Jan 2026 12:19:36 -0800 Subject: [PATCH 03/13] fix ANTHROPIC_TOKEN_COUNTING_BETA_VERSION --- litellm/constants.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/litellm/constants.py b/litellm/constants.py index c98551fb1b6..e142c7d6304 100644 --- a/litellm/constants.py +++ b/litellm/constants.py @@ -323,6 +323,9 @@ EMAIL_BUDGET_ALERT_MAX_SPEND_ALERT_PERCENTAGE = float(os.getenv("EMAIL_BUDGET_ALERT_MAX_SPEND_ALERT_PERCENTAGE", 0.8)) # 80% of max budget ############### LLM Provider Constants ############### ### ANTHROPIC CONSTANTS ### +ANTHROPIC_TOKEN_COUNTING_BETA_VERSION = os.getenv( + "ANTHROPIC_TOKEN_COUNTING_BETA_VERSION", "token-counting-2024-11-01" +) ANTHROPIC_SKILLS_API_BETA_VERSION = "skills-2025-10-02" ANTHROPIC_WEB_SEARCH_TOOL_MAX_USES = { "low": 1, From 2f3f26a7323355e9011b80fda91a207b94ef4f0f Mon Sep 17 00:00:00 2001 From: Ishaan Jaffer Date: Tue, 20 Jan 2026 12:21:03 -0800 Subject: [PATCH 04/13] refactor: get_token_counter --- litellm/llms/anthropic/common_utils.py | 42 +----- litellm/llms/azure_ai/common_utils.py | 169 ++++++++++++++++++++++--- 2 files changed, 154 insertions(+), 57 deletions(-) diff --git a/litellm/llms/anthropic/common_utils.py b/litellm/llms/anthropic/common_utils.py index fcbe9823ed4..3a8da2a7fd6 100644 --- a/litellm/llms/anthropic/common_utils.py +++ b/litellm/llms/anthropic/common_utils.py @@ -476,45 +476,11 @@ def get_token_counter(self) -> Optional[BaseTokenCounter]: Returns: AnthropicTokenCounter instance for this provider. """ - return AnthropicTokenCounter() - - -class AnthropicTokenCounter(BaseTokenCounter): - """Token counter implementation for Anthropic provider.""" - - def should_use_token_counting_api( - self, - custom_llm_provider: Optional[str] = None, - ) -> bool: - from litellm.types.utils import LlmProviders - return custom_llm_provider == LlmProviders.ANTHROPIC.value - - async def count_tokens( - self, - model_to_use: str, - messages: Optional[List[Dict[str, Any]]], - contents: Optional[List[Dict[str, Any]]], - deployment: Optional[Dict[str, Any]] = None, - request_model: str = "", - ) -> Optional[TokenCountResponse]: - from litellm.proxy.utils import count_tokens_with_anthropic_api - - result = await count_tokens_with_anthropic_api( - model_to_use=model_to_use, - messages=messages, - deployment=deployment, + from litellm.llms.anthropic.count_tokens.token_counter import ( + AnthropicTokenCounter, ) - - if result is not None: - return TokenCountResponse( - total_tokens=result.get("total_tokens", 0), - request_model=request_model, - model_used=model_to_use, - tokenizer_type=result.get("tokenizer_used", ""), - original_response=result, - ) - - return None + + return AnthropicTokenCounter() def process_anthropic_headers(headers: Union[httpx.Headers, dict]) -> dict: diff --git a/litellm/llms/azure_ai/common_utils.py b/litellm/llms/azure_ai/common_utils.py index 9487c7f83f2..1a88926ae39 100644 --- a/litellm/llms/azure_ai/common_utils.py +++ b/litellm/llms/azure_ai/common_utils.py @@ -1,17 +1,139 @@ -from typing import List, Literal, Optional +from typing import Any, Dict, List, Literal, Optional import litellm -from litellm.llms.base_llm.base_utils import BaseLLMModelInfo +from litellm.llms.base_llm.base_utils import BaseLLMModelInfo, BaseTokenCounter from litellm.secret_managers.main import get_secret_str from litellm.types.llms.openai import AllMessageValues +from litellm.types.utils import TokenCountResponse + + +class AzureAIAnthropicTokenCounter(BaseTokenCounter): + """Token counter implementation for Azure AI Anthropic provider using the CountTokens API.""" + + def should_use_token_counting_api( + self, + custom_llm_provider: Optional[str] = None, + ) -> bool: + from litellm.types.utils import LlmProviders + + return custom_llm_provider == LlmProviders.AZURE_AI.value + + async def count_tokens( + self, + model_to_use: str, + messages: Optional[List[Dict[str, Any]]], + contents: Optional[List[Dict[str, Any]]], + deployment: Optional[Dict[str, Any]] = None, + request_model: str = "", + ) -> Optional[TokenCountResponse]: + """ + Count tokens using Azure AI Anthropic's CountTokens API. + + Args: + model_to_use: The model identifier + messages: The messages to count tokens for + contents: Alternative content format (not used for Anthropic) + deployment: Deployment configuration containing litellm_params + request_model: The original request model name + + Returns: + TokenCountResponse with token count, or None if counting fails + """ + import os + + from litellm._logging import verbose_logger + from litellm.llms.anthropic.common_utils import AnthropicError + from litellm.llms.azure_ai.anthropic.count_tokens.handler import ( + AzureAIAnthropicCountTokensHandler, + ) + + if not messages: + return None + + deployment = deployment or {} + litellm_params = deployment.get("litellm_params", {}) + + # Get Azure AI API key from deployment config or environment + api_key = litellm_params.get("api_key") + if not api_key: + api_key = os.getenv("AZURE_AI_API_KEY") + + # Get API base from deployment config or environment + api_base = litellm_params.get("api_base") + if not api_base: + api_base = os.getenv("AZURE_AI_API_BASE") + + if not api_key: + verbose_logger.warning( + "No Azure AI API key found for token counting" + ) + return None + + if not api_base: + verbose_logger.warning( + "No Azure AI API base found for token counting" + ) + return None + + try: + handler = AzureAIAnthropicCountTokensHandler() + result = await handler.handle_count_tokens_request( + model=model_to_use, + messages=messages, + api_key=api_key, + api_base=api_base, + litellm_params=litellm_params, + ) + + if result is not None: + return TokenCountResponse( + total_tokens=result.get("input_tokens", 0), + request_model=request_model, + model_used=model_to_use, + tokenizer_type="azure_ai_anthropic_api", + original_response=result, + ) + except AnthropicError as e: + verbose_logger.warning( + f"Azure AI Anthropic CountTokens API error: status={e.status_code}, message={e.message}" + ) + return TokenCountResponse( + total_tokens=0, + request_model=request_model, + model_used=model_to_use, + tokenizer_type="azure_ai_anthropic_api", + error=True, + error_message=e.message, + status_code=e.status_code, + ) + except Exception as e: + verbose_logger.warning( + f"Error calling Azure AI Anthropic CountTokens API: {e}" + ) + return TokenCountResponse( + total_tokens=0, + request_model=request_model, + model_used=model_to_use, + tokenizer_type="azure_ai_anthropic_api", + error=True, + error_message=str(e), + status_code=500, + ) + + return None class AzureFoundryModelInfo(BaseLLMModelInfo): + """Model info for Azure AI / Azure Foundry models.""" + + def __init__(self, model: Optional[str] = None): + self._model = model + @staticmethod def get_azure_ai_route(model: str) -> Literal["agents", "default"]: """ Get the Azure AI route for the given model. - + Similar to BedrockModelInfo.get_bedrock_route(). """ if "agents/" in model: @@ -21,33 +143,40 @@ def get_azure_ai_route(model: str) -> Literal["agents", "default"]: @staticmethod def get_api_base(api_base: Optional[str] = None) -> Optional[str]: return ( - api_base - or litellm.api_base - or get_secret_str("AZURE_AI_API_BASE") + api_base or litellm.api_base or get_secret_str("AZURE_AI_API_BASE") ) - + @staticmethod def get_api_key(api_key: Optional[str] = None) -> Optional[str]: return ( - api_key - or litellm.api_key - or litellm.openai_key - or get_secret_str("AZURE_AI_API_KEY") - ) - + api_key + or litellm.api_key + or litellm.openai_key + or get_secret_str("AZURE_AI_API_KEY") + ) + @property def api_version(self, api_version: Optional[str] = None) -> Optional[str]: api_version = ( - api_version - or litellm.api_version - or get_secret_str("AZURE_API_VERSION") + api_version or litellm.api_version or get_secret_str("AZURE_API_VERSION") ) return api_version - + + def get_token_counter(self) -> Optional[BaseTokenCounter]: + """ + Factory method to create a token counter for Azure AI. + + Returns: + AzureAIAnthropicTokenCounter for Claude models, None otherwise. + """ + # Only return token counter for Claude models + if self._model and "claude" in self._model.lower(): + return AzureAIAnthropicTokenCounter() + return None + ######################################################### # Not implemented methods ######################################################### - @staticmethod def get_base_model(model: str) -> Optional[str]: @@ -64,4 +193,6 @@ def validate_environment( api_base: Optional[str] = None, ) -> dict: """Azure Foundry sends api key in query params""" - raise NotImplementedError("Azure Foundry does not support environment validation") + raise NotImplementedError( + "Azure Foundry does not support environment validation" + ) From aec8299d8b3e0664aca9323855cf38db4b738104 Mon Sep 17 00:00:00 2001 From: Ishaan Jaffer Date: Tue, 20 Jan 2026 12:22:02 -0800 Subject: [PATCH 05/13] init test suite for token counter --- .../base_token_counter_test.py | 130 ++++++++++++++++++ .../test_anthropic_token_counter.py | 47 +++++++ .../test_azure_ai_anthropic_token_counter.py | 53 +++++++ .../test_bedrock_token_counter.py | 101 ++++++++++++++ .../test_proxy_token_counter.py | 83 ++++++----- 5 files changed, 380 insertions(+), 34 deletions(-) create mode 100644 tests/litellm_utils_tests/base_token_counter_test.py create mode 100644 tests/litellm_utils_tests/test_anthropic_token_counter.py create mode 100644 tests/litellm_utils_tests/test_azure_ai_anthropic_token_counter.py create mode 100644 tests/litellm_utils_tests/test_bedrock_token_counter.py diff --git a/tests/litellm_utils_tests/base_token_counter_test.py b/tests/litellm_utils_tests/base_token_counter_test.py new file mode 100644 index 00000000000..b5e87021a0b --- /dev/null +++ b/tests/litellm_utils_tests/base_token_counter_test.py @@ -0,0 +1,130 @@ +""" +Base Token Counter Test Suite. + +This module provides an abstract base test class that enforces common tests +across all token counter implementations. Similar to base_llm_unit_tests.py +for LLM chat tests. + +Usage: + Create a test class that inherits from BaseTokenCounterTest and implement + the abstract methods to provide provider-specific configuration. +""" + +import os +import sys +from abc import ABC, abstractmethod +from typing import Any, Dict, List, Optional + +import pytest + +sys.path.insert( + 0, os.path.abspath("../..") +) # Adds the parent directory to the system path + +from litellm.llms.base_llm.base_utils import BaseTokenCounter +from litellm.types.utils import TokenCountResponse + + +class BaseTokenCounterTest(ABC): + """ + Abstract base test class for token counter implementations. + + Subclasses must implement: + - get_token_counter(): Returns the token counter instance + - get_test_model(): Returns the model name to use for testing + - get_test_messages(): Returns test messages for token counting + - get_deployment_config(): Returns deployment configuration with credentials + - get_custom_llm_provider(): Returns the provider name for should_use_token_counting_api + """ + + @abstractmethod + def get_token_counter(self) -> BaseTokenCounter: + """Must return the token counter instance to test.""" + pass + + @abstractmethod + def get_test_model(self) -> str: + """Must return the model name to use for testing.""" + pass + + @abstractmethod + def get_test_messages(self) -> List[Dict[str, Any]]: + """Must return test messages for token counting.""" + pass + + @abstractmethod + def get_deployment_config(self) -> Dict[str, Any]: + """Must return deployment configuration with credentials.""" + pass + + @abstractmethod + def get_custom_llm_provider(self) -> str: + """Must return the provider name for should_use_token_counting_api check.""" + pass + + @pytest.fixture(autouse=True) + def _handle_missing_credentials(self): + """Fixture to skip tests when credentials are missing.""" + try: + yield + except Exception as e: + error_str = str(e).lower() + if "api key" in error_str or "api_key" in error_str or "unauthorized" in error_str: + pytest.skip(f"Missing or invalid credentials: {e}") + raise + + @pytest.mark.asyncio + async def test_count_tokens_basic(self): + """ + Test basic token counting functionality. + + Verifies that: + - Token counter returns a TokenCountResponse + - total_tokens is greater than 0 + - tokenizer_type is set + - No error occurred + """ + token_counter = self.get_token_counter() + model = self.get_test_model() + messages = self.get_test_messages() + deployment = self.get_deployment_config() + + result = await token_counter.count_tokens( + model_to_use=model, + messages=messages, + contents=None, + deployment=deployment, + request_model=model, + ) + + print(f"Token count result: {result}") + + assert result is not None, "Token counter should return a result" + assert isinstance(result, TokenCountResponse), "Result should be TokenCountResponse" + assert result.total_tokens > 0, f"Token count should be > 0, got {result.total_tokens}" + assert result.tokenizer_type is not None, "tokenizer_type should be set" + assert result.error is not True, f"Token counting should not error: {result.error_message}" + + def test_should_use_token_counting_api(self): + """ + Test that should_use_token_counting_api returns True for the correct provider. + + Verifies that the token counter correctly identifies when it should be used + based on the custom_llm_provider. + """ + token_counter = self.get_token_counter() + provider = self.get_custom_llm_provider() + + result = token_counter.should_use_token_counting_api( + custom_llm_provider=provider + ) + + assert result is True, f"should_use_token_counting_api should return True for {provider}" + + # Also verify it returns False for other providers + other_provider = "some_other_provider_that_doesnt_exist" + result_other = token_counter.should_use_token_counting_api( + custom_llm_provider=other_provider + ) + + assert result_other is False, f"should_use_token_counting_api should return False for {other_provider}" diff --git a/tests/litellm_utils_tests/test_anthropic_token_counter.py b/tests/litellm_utils_tests/test_anthropic_token_counter.py new file mode 100644 index 00000000000..a1fbcecfdd4 --- /dev/null +++ b/tests/litellm_utils_tests/test_anthropic_token_counter.py @@ -0,0 +1,47 @@ +""" +Anthropic Token Counter Tests. + +Tests for the Anthropic token counter implementation using the base test suite. +""" + +import os +import sys +from typing import Any, Dict, List + +import pytest + +sys.path.insert( + 0, os.path.abspath("../..") +) # Adds the parent directory to the system path + +from litellm.llms.anthropic.count_tokens import AnthropicTokenCounter +from litellm.llms.base_llm.base_utils import BaseTokenCounter +from tests.litellm_utils_tests.base_token_counter_test import BaseTokenCounterTest + + +class TestAnthropicTokenCounter(BaseTokenCounterTest): + """Test suite for Anthropic token counter.""" + + def get_token_counter(self) -> BaseTokenCounter: + return AnthropicTokenCounter() + + def get_test_model(self) -> str: + return "claude-sonnet-4-20250514" + + def get_test_messages(self) -> List[Dict[str, Any]]: + return [ + {"role": "user", "content": "Hello, how are you today?"} + ] + + def get_deployment_config(self) -> Dict[str, Any]: + api_key = os.getenv("ANTHROPIC_API_KEY") + if not api_key: + pytest.skip("ANTHROPIC_API_KEY not set") + return { + "litellm_params": { + "api_key": api_key, + } + } + + def get_custom_llm_provider(self) -> str: + return "anthropic" diff --git a/tests/litellm_utils_tests/test_azure_ai_anthropic_token_counter.py b/tests/litellm_utils_tests/test_azure_ai_anthropic_token_counter.py new file mode 100644 index 00000000000..031502cbece --- /dev/null +++ b/tests/litellm_utils_tests/test_azure_ai_anthropic_token_counter.py @@ -0,0 +1,53 @@ +""" +Azure AI Anthropic Token Counter Tests. + +Tests for the Azure AI Anthropic token counter implementation using the base test suite. +""" + +import os +import sys +from typing import Any, Dict, List + +import pytest + +sys.path.insert( + 0, os.path.abspath("../..") +) # Adds the parent directory to the system path + +from litellm.llms.azure_ai.anthropic.count_tokens import AzureAIAnthropicTokenCounter +from litellm.llms.base_llm.base_utils import BaseTokenCounter +from tests.litellm_utils_tests.base_token_counter_test import BaseTokenCounterTest + + +class TestAzureAIAnthropicTokenCounter(BaseTokenCounterTest): + """Test suite for Azure AI Anthropic token counter.""" + + def get_token_counter(self) -> BaseTokenCounter: + return AzureAIAnthropicTokenCounter() + + def get_test_model(self) -> str: + return "claude-3-5-sonnet" + + def get_test_messages(self) -> List[Dict[str, Any]]: + return [ + {"role": "user", "content": "Hello, how are you today?"} + ] + + def get_deployment_config(self) -> Dict[str, Any]: + api_key = os.getenv("AZURE_AI_API_KEY") + api_base = os.getenv("AZURE_AI_API_BASE") + + if not api_key: + pytest.skip("AZURE_AI_API_KEY not set") + if not api_base: + pytest.skip("AZURE_AI_API_BASE not set") + + return { + "litellm_params": { + "api_key": api_key, + "api_base": api_base, + } + } + + def get_custom_llm_provider(self) -> str: + return "azure_ai" diff --git a/tests/litellm_utils_tests/test_bedrock_token_counter.py b/tests/litellm_utils_tests/test_bedrock_token_counter.py new file mode 100644 index 00000000000..f7c29918820 --- /dev/null +++ b/tests/litellm_utils_tests/test_bedrock_token_counter.py @@ -0,0 +1,101 @@ +""" +Bedrock Token Counter Tests. + +Tests for the Bedrock token counter implementation using the base test suite. + +Note: Not all Bedrock models support token counting. The CountTokens API +is only available for specific models. If the model doesn't support token +counting, the test will be skipped. +""" + +import os +import sys +from typing import Any, Dict, List + +import pytest + +sys.path.insert( + 0, os.path.abspath("../..") +) # Adds the parent directory to the system path + +from litellm.llms.base_llm.base_utils import BaseTokenCounter +from litellm.llms.bedrock.count_tokens.bedrock_token_counter import BedrockTokenCounter +from tests.litellm_utils_tests.base_token_counter_test import BaseTokenCounterTest + + +class TestBedrockTokenCounter(BaseTokenCounterTest): + """Test suite for Bedrock token counter. + + Note: Bedrock CountTokens API support varies by model. Some models + (like older Claude versions) may not support token counting. + Use amazon.nova-* models for reliable token counting support. + """ + + def get_token_counter(self) -> BaseTokenCounter: + return BedrockTokenCounter() + + def get_test_model(self) -> str: + # Use Amazon Nova model which supports token counting + # Alternatively, use environment variable to override + return os.getenv("BEDROCK_TEST_MODEL", "amazon.nova-lite-v1:0") + + def get_test_messages(self) -> List[Dict[str, Any]]: + return [ + {"role": "user", "content": "Hello, how are you today?"} + ] + + def get_deployment_config(self) -> Dict[str, Any]: + # Bedrock uses AWS credentials from environment + # Check for AWS credentials + aws_access_key = os.getenv("AWS_ACCESS_KEY_ID") + aws_secret_key = os.getenv("AWS_SECRET_ACCESS_KEY") + aws_region = os.getenv("AWS_REGION_NAME", "us-east-1") + + if not aws_access_key or not aws_secret_key: + pytest.skip("AWS credentials not set (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)") + + return { + "litellm_params": { + "aws_access_key_id": aws_access_key, + "aws_secret_access_key": aws_secret_key, + "aws_region_name": aws_region, + } + } + + def get_custom_llm_provider(self) -> str: + return "bedrock" + + @pytest.mark.asyncio + async def test_count_tokens_basic(self): + """ + Test basic token counting functionality. + + Override to handle models that don't support token counting. + """ + from litellm.types.utils import TokenCountResponse + + token_counter = self.get_token_counter() + model = self.get_test_model() + messages = self.get_test_messages() + deployment = self.get_deployment_config() + + result = await token_counter.count_tokens( + model_to_use=model, + messages=messages, + contents=None, + deployment=deployment, + request_model=model, + ) + + print(f"Token count result: {result}") + + assert result is not None, "Token counter should return a result" + assert isinstance(result, TokenCountResponse), "Result should be TokenCountResponse" + + # Check if the model doesn't support token counting + if result.error and "doesn't support counting tokens" in str(result.error_message): + pytest.skip(f"Model {model} doesn't support token counting: {result.error_message}") + + assert result.total_tokens > 0, f"Token count should be > 0, got {result.total_tokens}" + assert result.tokenizer_type is not None, "tokenizer_type should be set" + assert result.error is not True, f"Token counting should not error: {result.error_message}" diff --git a/tests/proxy_unit_tests/test_proxy_token_counter.py b/tests/proxy_unit_tests/test_proxy_token_counter.py index 8e1057bb8e1..4a2ff28cfd3 100644 --- a/tests/proxy_unit_tests/test_proxy_token_counter.py +++ b/tests/proxy_unit_tests/test_proxy_token_counter.py @@ -478,18 +478,19 @@ async def mock_read_request_body(request): @pytest.mark.asyncio async def test_factory_anthropic_endpoint_calls_anthropic_counter(): """Test that /v1/messages/count_tokens with Anthropic model uses Anthropic counter.""" - from unittest.mock import patch, AsyncMock + from unittest.mock import patch, AsyncMock, MagicMock from fastapi.testclient import TestClient from litellm.proxy.proxy_server import app - # Mock the anthropic token counting function + # Mock the Anthropic CountTokens handler with patch( - "litellm.proxy.utils.count_tokens_with_anthropic_api" - ) as mock_anthropic_count: - mock_anthropic_count.return_value = { - "total_tokens": 42, - "tokenizer_used": "anthropic", - } + "litellm.llms.anthropic.common_utils.AnthropicCountTokensHandler" + ) as MockHandler: + mock_handler_instance = MagicMock() + mock_handler_instance.handle_count_tokens_request = AsyncMock( + return_value={"input_tokens": 42} + ) + MockHandler.return_value = mock_handler_instance # Mock router to return Anthropic deployment with patch("litellm.proxy.proxy_server.llm_router") as mock_router: @@ -510,36 +511,44 @@ async def test_factory_anthropic_endpoint_calls_anthropic_counter(): } ) - client = TestClient(app) + # Set ANTHROPIC_API_KEY for the test + with patch.dict("os.environ", {"ANTHROPIC_API_KEY": "test-key"}): + client = TestClient(app) - response = client.post( - "/v1/messages/count_tokens", - json={ - "model": "claude-3-5-sonnet", - "messages": [{"role": "user", "content": "Hello"}], - }, - headers={"Authorization": "Bearer test-key"}, - ) + response = client.post( + "/v1/messages/count_tokens", + json={ + "model": "claude-3-5-sonnet", + "messages": [{"role": "user", "content": "Hello"}], + }, + headers={"Authorization": "Bearer test-key"}, + ) - assert response.status_code == 200 - data = response.json() - assert data["input_tokens"] == 42 + assert response.status_code == 200 + data = response.json() + assert data["input_tokens"] == 42 - # Verify that Anthropic API was called - mock_anthropic_count.assert_called_once() + # Verify that Anthropic handler was called + mock_handler_instance.handle_count_tokens_request.assert_called_once() @pytest.mark.asyncio async def test_factory_gpt4_endpoint_does_not_call_anthropic_counter(): """Test that /v1/messages/count_tokens with GPT-4 does NOT use Anthropic counter.""" - from unittest.mock import patch, AsyncMock + from unittest.mock import patch, AsyncMock, MagicMock from fastapi.testclient import TestClient from litellm.proxy.proxy_server import app - # Mock the anthropic token counting function + # Mock the Anthropic CountTokens handler with patch( - "litellm.proxy.utils.count_tokens_with_anthropic_api" - ) as mock_anthropic_count: + "litellm.llms.anthropic.common_utils.AnthropicCountTokensHandler" + ) as MockHandler: + mock_handler_instance = MagicMock() + mock_handler_instance.handle_count_tokens_request = AsyncMock( + return_value={"input_tokens": 42} + ) + MockHandler.return_value = mock_handler_instance + # Mock litellm token counter with patch("litellm.token_counter") as mock_litellm_counter: mock_litellm_counter.return_value = 50 @@ -578,21 +587,27 @@ async def test_factory_gpt4_endpoint_does_not_call_anthropic_counter(): data = response.json() assert data["input_tokens"] == 50 - # Verify that Anthropic API was NOT called - mock_anthropic_count.assert_not_called() + # Verify that Anthropic handler was NOT called + mock_handler_instance.handle_count_tokens_request.assert_not_called() @pytest.mark.asyncio async def test_factory_normal_token_counter_endpoint_does_not_call_anthropic(): """Test that /utils/token_counter does NOT use Anthropic counter even with Anthropic model.""" - from unittest.mock import patch, AsyncMock + from unittest.mock import patch, AsyncMock, MagicMock from fastapi.testclient import TestClient from litellm.proxy.proxy_server import app - # Mock the anthropic token counting function + # Mock the Anthropic CountTokens handler with patch( - "litellm.proxy.utils.count_tokens_with_anthropic_api" - ) as mock_anthropic_count: + "litellm.llms.anthropic.common_utils.AnthropicCountTokensHandler" + ) as MockHandler: + mock_handler_instance = MagicMock() + mock_handler_instance.handle_count_tokens_request = AsyncMock( + return_value={"input_tokens": 42} + ) + MockHandler.return_value = mock_handler_instance + # Mock litellm token counter with patch("litellm.token_counter") as mock_litellm_counter: mock_litellm_counter.return_value = 35 @@ -635,8 +650,8 @@ async def test_factory_normal_token_counter_endpoint_does_not_call_anthropic(): data = response.json() assert data["total_tokens"] == 35 - # Verify that Anthropic API was NOT called (since call_endpoint=False) - mock_anthropic_count.assert_not_called() + # Verify that Anthropic handler was NOT called (since call_endpoint=False) + mock_handler_instance.handle_count_tokens_request.assert_not_called() @pytest.mark.asyncio From 2e690fce3fd80a85c8312f00c582a46132890ee8 Mon Sep 17 00:00:00 2001 From: Ishaan Jaffer Date: Tue, 20 Jan 2026 12:23:47 -0800 Subject: [PATCH 06/13] init token counters --- .../llms/anthropic/count_tokens/__init__.py | 15 ++ .../llms/anthropic/count_tokens/handler.py | 126 +++++++++++++++++ .../anthropic/count_tokens/token_counter.py | 104 ++++++++++++++ .../anthropic/count_tokens/transformation.py | 121 ++++++++++++++++ .../anthropic/count_tokens/__init__.py | 19 +++ .../anthropic/count_tokens/handler.py | 131 ++++++++++++++++++ .../anthropic/count_tokens/token_counter.py | 119 ++++++++++++++++ .../anthropic/count_tokens/transformation.py | 88 ++++++++++++ litellm/llms/azure_ai/common_utils.py | 125 +---------------- 9 files changed, 728 insertions(+), 120 deletions(-) create mode 100644 litellm/llms/anthropic/count_tokens/__init__.py create mode 100644 litellm/llms/anthropic/count_tokens/handler.py create mode 100644 litellm/llms/anthropic/count_tokens/token_counter.py create mode 100644 litellm/llms/anthropic/count_tokens/transformation.py create mode 100644 litellm/llms/azure_ai/anthropic/count_tokens/__init__.py create mode 100644 litellm/llms/azure_ai/anthropic/count_tokens/handler.py create mode 100644 litellm/llms/azure_ai/anthropic/count_tokens/token_counter.py create mode 100644 litellm/llms/azure_ai/anthropic/count_tokens/transformation.py diff --git a/litellm/llms/anthropic/count_tokens/__init__.py b/litellm/llms/anthropic/count_tokens/__init__.py new file mode 100644 index 00000000000..ef46862bda6 --- /dev/null +++ b/litellm/llms/anthropic/count_tokens/__init__.py @@ -0,0 +1,15 @@ +""" +Anthropic CountTokens API implementation. +""" + +from litellm.llms.anthropic.count_tokens.handler import AnthropicCountTokensHandler +from litellm.llms.anthropic.count_tokens.token_counter import AnthropicTokenCounter +from litellm.llms.anthropic.count_tokens.transformation import ( + AnthropicCountTokensConfig, +) + +__all__ = [ + "AnthropicCountTokensHandler", + "AnthropicCountTokensConfig", + "AnthropicTokenCounter", +] diff --git a/litellm/llms/anthropic/count_tokens/handler.py b/litellm/llms/anthropic/count_tokens/handler.py new file mode 100644 index 00000000000..422c6f0e559 --- /dev/null +++ b/litellm/llms/anthropic/count_tokens/handler.py @@ -0,0 +1,126 @@ +""" +Anthropic CountTokens API handler. + +Uses httpx for HTTP requests instead of the Anthropic SDK. +""" + +from typing import Any, Dict, List, Optional, Union + +import httpx + +import litellm +from litellm._logging import verbose_logger +from litellm.llms.anthropic.common_utils import AnthropicError +from litellm.llms.anthropic.count_tokens.transformation import ( + AnthropicCountTokensConfig, +) +from litellm.llms.custom_httpx.http_handler import get_async_httpx_client + + +class AnthropicCountTokensHandler(AnthropicCountTokensConfig): + """ + Handler for Anthropic CountTokens API requests. + + Uses httpx for HTTP requests, following the same pattern as BedrockCountTokensHandler. + """ + + async def handle_count_tokens_request( + self, + model: str, + messages: List[Dict[str, Any]], + api_key: str, + api_base: Optional[str] = None, + timeout: Optional[Union[float, httpx.Timeout]] = None, + ) -> Dict[str, Any]: + """ + Handle a CountTokens request using httpx. + + Args: + model: The model identifier (e.g., "claude-3-5-sonnet-20241022") + messages: The messages to count tokens for + api_key: The Anthropic API key + api_base: Optional custom API base URL + timeout: Optional timeout for the request (defaults to litellm.request_timeout) + + Returns: + Dictionary containing token count response + + Raises: + AnthropicError: If the API request fails + """ + try: + # Validate the request + self.validate_request(model, messages) + + verbose_logger.debug( + f"Processing Anthropic CountTokens request for model: {model}" + ) + + # Transform request to Anthropic format + request_body = self.transform_request_to_count_tokens( + model=model, + messages=messages, + ) + + verbose_logger.debug(f"Transformed request: {request_body}") + + # Get endpoint URL + endpoint_url = api_base or self.get_anthropic_count_tokens_endpoint() + + verbose_logger.debug(f"Making request to: {endpoint_url}") + + # Get required headers + headers = self.get_required_headers(api_key) + + # Use LiteLLM's async httpx client + async_client = get_async_httpx_client( + llm_provider=litellm.LlmProviders.ANTHROPIC + ) + + # Use provided timeout or fall back to litellm.request_timeout + request_timeout = timeout if timeout is not None else litellm.request_timeout + + response = await async_client.post( + endpoint_url, + headers=headers, + json=request_body, + timeout=request_timeout, + ) + + verbose_logger.debug(f"Response status: {response.status_code}") + + if response.status_code != 200: + error_text = response.text + verbose_logger.error(f"Anthropic API error: {error_text}") + raise AnthropicError( + status_code=response.status_code, + message=error_text, + ) + + anthropic_response = response.json() + + verbose_logger.debug(f"Anthropic response: {anthropic_response}") + + # Transform response + final_response = self.transform_response(anthropic_response) + + verbose_logger.debug(f"Final response: {final_response}") + + return final_response + + except AnthropicError: + # Re-raise Anthropic exceptions as-is + raise + except httpx.HTTPStatusError as e: + # HTTP errors - preserve the actual status code + verbose_logger.error(f"HTTP error in CountTokens handler: {str(e)}") + raise AnthropicError( + status_code=e.response.status_code, + message=e.response.text, + ) + except Exception as e: + verbose_logger.error(f"Error in CountTokens handler: {str(e)}") + raise AnthropicError( + status_code=500, + message=f"CountTokens processing error: {str(e)}", + ) diff --git a/litellm/llms/anthropic/count_tokens/token_counter.py b/litellm/llms/anthropic/count_tokens/token_counter.py new file mode 100644 index 00000000000..266b2794fc3 --- /dev/null +++ b/litellm/llms/anthropic/count_tokens/token_counter.py @@ -0,0 +1,104 @@ +""" +Anthropic Token Counter implementation using the CountTokens API. +""" + +import os +from typing import Any, Dict, List, Optional + +from litellm._logging import verbose_logger +from litellm.llms.anthropic.count_tokens.handler import AnthropicCountTokensHandler +from litellm.llms.base_llm.base_utils import BaseTokenCounter +from litellm.types.utils import LlmProviders, TokenCountResponse + +# Global handler instance - reuse across all token counting requests +anthropic_count_tokens_handler = AnthropicCountTokensHandler() + + +class AnthropicTokenCounter(BaseTokenCounter): + """Token counter implementation for Anthropic provider using the CountTokens API.""" + + def should_use_token_counting_api( + self, + custom_llm_provider: Optional[str] = None, + ) -> bool: + return custom_llm_provider == LlmProviders.ANTHROPIC.value + + async def count_tokens( + self, + model_to_use: str, + messages: Optional[List[Dict[str, Any]]], + contents: Optional[List[Dict[str, Any]]], + deployment: Optional[Dict[str, Any]] = None, + request_model: str = "", + ) -> Optional[TokenCountResponse]: + """ + Count tokens using Anthropic's CountTokens API. + + Args: + model_to_use: The model identifier + messages: The messages to count tokens for + contents: Alternative content format (not used for Anthropic) + deployment: Deployment configuration containing litellm_params + request_model: The original request model name + + Returns: + TokenCountResponse with token count, or None if counting fails + """ + from litellm.llms.anthropic.common_utils import AnthropicError + + if not messages: + return None + + deployment = deployment or {} + litellm_params = deployment.get("litellm_params", {}) + + # Get Anthropic API key from deployment config or environment + api_key = litellm_params.get("api_key") + if not api_key: + api_key = os.getenv("ANTHROPIC_API_KEY") + + if not api_key: + verbose_logger.warning("No Anthropic API key found for token counting") + return None + + try: + result = await anthropic_count_tokens_handler.handle_count_tokens_request( + model=model_to_use, + messages=messages, + api_key=api_key, + ) + + if result is not None: + return TokenCountResponse( + total_tokens=result.get("input_tokens", 0), + request_model=request_model, + model_used=model_to_use, + tokenizer_type="anthropic_api", + original_response=result, + ) + except AnthropicError as e: + verbose_logger.warning( + f"Anthropic CountTokens API error: status={e.status_code}, message={e.message}" + ) + return TokenCountResponse( + total_tokens=0, + request_model=request_model, + model_used=model_to_use, + tokenizer_type="anthropic_api", + error=True, + error_message=e.message, + status_code=e.status_code, + ) + except Exception as e: + verbose_logger.warning(f"Error calling Anthropic CountTokens API: {e}") + return TokenCountResponse( + total_tokens=0, + request_model=request_model, + model_used=model_to_use, + tokenizer_type="anthropic_api", + error=True, + error_message=str(e), + status_code=500, + ) + + return None diff --git a/litellm/llms/anthropic/count_tokens/transformation.py b/litellm/llms/anthropic/count_tokens/transformation.py new file mode 100644 index 00000000000..beffd3beeec --- /dev/null +++ b/litellm/llms/anthropic/count_tokens/transformation.py @@ -0,0 +1,121 @@ +""" +Anthropic CountTokens API transformation logic. + +This module handles the transformation of requests to Anthropic's CountTokens API format. +""" + +from typing import Any, Dict, List + +from litellm.constants import ANTHROPIC_TOKEN_COUNTING_BETA_VERSION + + +class AnthropicCountTokensConfig: + """ + Configuration and transformation logic for Anthropic CountTokens API. + + Anthropic CountTokens API Specification: + - Endpoint: POST https://api.anthropic.com/v1/messages/count_tokens + - Beta header required: anthropic-beta: token-counting-2024-11-01 + - Response: {"input_tokens": } + """ + + def get_anthropic_count_tokens_endpoint(self) -> str: + """ + Get the Anthropic CountTokens API endpoint. + + Returns: + The endpoint URL for the CountTokens API + """ + return "https://api.anthropic.com/v1/messages/count_tokens" + + def transform_request_to_count_tokens( + self, + model: str, + messages: List[Dict[str, Any]], + ) -> Dict[str, Any]: + """ + Transform request to Anthropic CountTokens format. + + Input: + { + "model": "claude-3-5-sonnet-20241022", + "messages": [{"role": "user", "content": "Hello!"}] + } + + Output (Anthropic CountTokens format): + { + "model": "claude-3-5-sonnet-20241022", + "messages": [{"role": "user", "content": "Hello!"}] + } + """ + return { + "model": model, + "messages": messages, + } + + def transform_response(self, response: Dict[str, Any]) -> Dict[str, Any]: + """ + Transform Anthropic CountTokens response. + + Input (Anthropic response): + { + "input_tokens": 123 + } + + Output: + { + "input_tokens": 123 + } + """ + return { + "input_tokens": response.get("input_tokens", 0), + } + + def get_required_headers(self, api_key: str) -> Dict[str, str]: + """ + Get the required headers for the CountTokens API. + + Args: + api_key: The Anthropic API key + + Returns: + Dictionary of required headers + """ + return { + "Content-Type": "application/json", + "x-api-key": api_key, + "anthropic-version": "2023-06-01", + "anthropic-beta": ANTHROPIC_TOKEN_COUNTING_BETA_VERSION, + } + + def validate_request( + self, model: str, messages: List[Dict[str, Any]] + ) -> None: + """ + Validate the incoming count tokens request. + + Args: + model: The model name + messages: The messages to count tokens for + + Raises: + ValueError: If the request is invalid + """ + if not model: + raise ValueError("model parameter is required") + + if not messages: + raise ValueError("messages parameter is required") + + if not isinstance(messages, list): + raise ValueError("messages must be a list") + + for i, message in enumerate(messages): + if not isinstance(message, dict): + raise ValueError(f"Message {i} must be a dictionary") + + if "role" not in message: + raise ValueError(f"Message {i} must have a 'role' field") + + if "content" not in message: + raise ValueError(f"Message {i} must have a 'content' field") diff --git a/litellm/llms/azure_ai/anthropic/count_tokens/__init__.py b/litellm/llms/azure_ai/anthropic/count_tokens/__init__.py new file mode 100644 index 00000000000..9605d401f8e --- /dev/null +++ b/litellm/llms/azure_ai/anthropic/count_tokens/__init__.py @@ -0,0 +1,19 @@ +""" +Azure AI Anthropic CountTokens API implementation. +""" + +from litellm.llms.azure_ai.anthropic.count_tokens.handler import ( + AzureAIAnthropicCountTokensHandler, +) +from litellm.llms.azure_ai.anthropic.count_tokens.token_counter import ( + AzureAIAnthropicTokenCounter, +) +from litellm.llms.azure_ai.anthropic.count_tokens.transformation import ( + AzureAIAnthropicCountTokensConfig, +) + +__all__ = [ + "AzureAIAnthropicCountTokensHandler", + "AzureAIAnthropicCountTokensConfig", + "AzureAIAnthropicTokenCounter", +] diff --git a/litellm/llms/azure_ai/anthropic/count_tokens/handler.py b/litellm/llms/azure_ai/anthropic/count_tokens/handler.py new file mode 100644 index 00000000000..099f92144ee --- /dev/null +++ b/litellm/llms/azure_ai/anthropic/count_tokens/handler.py @@ -0,0 +1,131 @@ +""" +Azure AI Anthropic CountTokens API handler. + +Uses httpx for HTTP requests with Azure authentication. +""" + +from typing import Any, Dict, List, Optional, Union + +import httpx + +import litellm +from litellm._logging import verbose_logger +from litellm.llms.anthropic.common_utils import AnthropicError +from litellm.llms.azure_ai.anthropic.count_tokens.transformation import ( + AzureAIAnthropicCountTokensConfig, +) +from litellm.llms.custom_httpx.http_handler import get_async_httpx_client + + +class AzureAIAnthropicCountTokensHandler(AzureAIAnthropicCountTokensConfig): + """ + Handler for Azure AI Anthropic CountTokens API requests. + + Uses httpx for HTTP requests with Azure authentication. + """ + + async def handle_count_tokens_request( + self, + model: str, + messages: List[Dict[str, Any]], + api_key: str, + api_base: str, + litellm_params: Optional[Dict[str, Any]] = None, + timeout: Optional[Union[float, httpx.Timeout]] = None, + ) -> Dict[str, Any]: + """ + Handle a CountTokens request using httpx with Azure authentication. + + Args: + model: The model identifier (e.g., "claude-3-5-sonnet") + messages: The messages to count tokens for + api_key: The Azure AI API key + api_base: The Azure AI API base URL + litellm_params: Optional LiteLLM parameters + timeout: Optional timeout for the request (defaults to litellm.request_timeout) + + Returns: + Dictionary containing token count response + + Raises: + AnthropicError: If the API request fails + """ + try: + # Validate the request + self.validate_request(model, messages) + + verbose_logger.debug( + f"Processing Azure AI Anthropic CountTokens request for model: {model}" + ) + + # Transform request to Anthropic format + request_body = self.transform_request_to_count_tokens( + model=model, + messages=messages, + ) + + verbose_logger.debug(f"Transformed request: {request_body}") + + # Get endpoint URL + endpoint_url = self.get_count_tokens_endpoint(api_base) + + verbose_logger.debug(f"Making request to: {endpoint_url}") + + # Get required headers with Azure authentication + headers = self.get_required_headers( + api_key=api_key, + litellm_params=litellm_params, + ) + + # Use LiteLLM's async httpx client + async_client = get_async_httpx_client( + llm_provider=litellm.LlmProviders.AZURE_AI + ) + + # Use provided timeout or fall back to litellm.request_timeout + request_timeout = timeout if timeout is not None else litellm.request_timeout + + response = await async_client.post( + endpoint_url, + headers=headers, + json=request_body, + timeout=request_timeout, + ) + + verbose_logger.debug(f"Response status: {response.status_code}") + + if response.status_code != 200: + error_text = response.text + verbose_logger.error(f"Azure AI Anthropic API error: {error_text}") + raise AnthropicError( + status_code=response.status_code, + message=error_text, + ) + + azure_response = response.json() + + verbose_logger.debug(f"Azure AI Anthropic response: {azure_response}") + + # Transform response + final_response = self.transform_response(azure_response) + + verbose_logger.debug(f"Final response: {final_response}") + + return final_response + + except AnthropicError: + # Re-raise Anthropic exceptions as-is + raise + except httpx.HTTPStatusError as e: + # HTTP errors - preserve the actual status code + verbose_logger.error(f"HTTP error in CountTokens handler: {str(e)}") + raise AnthropicError( + status_code=e.response.status_code, + message=e.response.text, + ) + except Exception as e: + verbose_logger.error(f"Error in CountTokens handler: {str(e)}") + raise AnthropicError( + status_code=500, + message=f"CountTokens processing error: {str(e)}", + ) diff --git a/litellm/llms/azure_ai/anthropic/count_tokens/token_counter.py b/litellm/llms/azure_ai/anthropic/count_tokens/token_counter.py new file mode 100644 index 00000000000..14f92800079 --- /dev/null +++ b/litellm/llms/azure_ai/anthropic/count_tokens/token_counter.py @@ -0,0 +1,119 @@ +""" +Azure AI Anthropic Token Counter implementation using the CountTokens API. +""" + +import os +from typing import Any, Dict, List, Optional + +from litellm._logging import verbose_logger +from litellm.llms.azure_ai.anthropic.count_tokens.handler import ( + AzureAIAnthropicCountTokensHandler, +) +from litellm.llms.base_llm.base_utils import BaseTokenCounter +from litellm.types.utils import LlmProviders, TokenCountResponse + +# Global handler instance - reuse across all token counting requests +azure_ai_anthropic_count_tokens_handler = AzureAIAnthropicCountTokensHandler() + + +class AzureAIAnthropicTokenCounter(BaseTokenCounter): + """Token counter implementation for Azure AI Anthropic provider using the CountTokens API.""" + + def should_use_token_counting_api( + self, + custom_llm_provider: Optional[str] = None, + ) -> bool: + return custom_llm_provider == LlmProviders.AZURE_AI.value + + async def count_tokens( + self, + model_to_use: str, + messages: Optional[List[Dict[str, Any]]], + contents: Optional[List[Dict[str, Any]]], + deployment: Optional[Dict[str, Any]] = None, + request_model: str = "", + ) -> Optional[TokenCountResponse]: + """ + Count tokens using Azure AI Anthropic's CountTokens API. + + Args: + model_to_use: The model identifier + messages: The messages to count tokens for + contents: Alternative content format (not used for Anthropic) + deployment: Deployment configuration containing litellm_params + request_model: The original request model name + + Returns: + TokenCountResponse with token count, or None if counting fails + """ + from litellm.llms.anthropic.common_utils import AnthropicError + + if not messages: + return None + + deployment = deployment or {} + litellm_params = deployment.get("litellm_params", {}) + + # Get Azure AI API key from deployment config or environment + api_key = litellm_params.get("api_key") + if not api_key: + api_key = os.getenv("AZURE_AI_API_KEY") + + # Get API base from deployment config or environment + api_base = litellm_params.get("api_base") + if not api_base: + api_base = os.getenv("AZURE_AI_API_BASE") + + if not api_key: + verbose_logger.warning("No Azure AI API key found for token counting") + return None + + if not api_base: + verbose_logger.warning("No Azure AI API base found for token counting") + return None + + try: + result = await azure_ai_anthropic_count_tokens_handler.handle_count_tokens_request( + model=model_to_use, + messages=messages, + api_key=api_key, + api_base=api_base, + litellm_params=litellm_params, + ) + + if result is not None: + return TokenCountResponse( + total_tokens=result.get("input_tokens", 0), + request_model=request_model, + model_used=model_to_use, + tokenizer_type="azure_ai_anthropic_api", + original_response=result, + ) + except AnthropicError as e: + verbose_logger.warning( + f"Azure AI Anthropic CountTokens API error: status={e.status_code}, message={e.message}" + ) + return TokenCountResponse( + total_tokens=0, + request_model=request_model, + model_used=model_to_use, + tokenizer_type="azure_ai_anthropic_api", + error=True, + error_message=e.message, + status_code=e.status_code, + ) + except Exception as e: + verbose_logger.warning( + f"Error calling Azure AI Anthropic CountTokens API: {e}" + ) + return TokenCountResponse( + total_tokens=0, + request_model=request_model, + model_used=model_to_use, + tokenizer_type="azure_ai_anthropic_api", + error=True, + error_message=str(e), + status_code=500, + ) + + return None diff --git a/litellm/llms/azure_ai/anthropic/count_tokens/transformation.py b/litellm/llms/azure_ai/anthropic/count_tokens/transformation.py new file mode 100644 index 00000000000..e284595cc8a --- /dev/null +++ b/litellm/llms/azure_ai/anthropic/count_tokens/transformation.py @@ -0,0 +1,88 @@ +""" +Azure AI Anthropic CountTokens API transformation logic. + +Extends the base Anthropic CountTokens transformation with Azure authentication. +""" + +from typing import Any, Dict, Optional + +from litellm.constants import ANTHROPIC_TOKEN_COUNTING_BETA_VERSION +from litellm.llms.anthropic.count_tokens.transformation import ( + AnthropicCountTokensConfig, +) +from litellm.llms.azure.common_utils import BaseAzureLLM +from litellm.types.router import GenericLiteLLMParams + + +class AzureAIAnthropicCountTokensConfig(AnthropicCountTokensConfig): + """ + Configuration and transformation logic for Azure AI Anthropic CountTokens API. + + Extends AnthropicCountTokensConfig with Azure authentication. + Azure AI Anthropic uses the same endpoint format but with Azure auth headers. + """ + + def get_required_headers( + self, + api_key: str, + litellm_params: Optional[Dict[str, Any]] = None, + ) -> Dict[str, str]: + """ + Get the required headers for the Azure AI Anthropic CountTokens API. + + Uses Azure authentication (api-key header) instead of Anthropic's x-api-key. + + Args: + api_key: The Azure AI API key + litellm_params: Optional LiteLLM parameters for additional auth config + + Returns: + Dictionary of required headers with Azure authentication + """ + # Start with base headers + headers = { + "Content-Type": "application/json", + "anthropic-version": "2023-06-01", + "anthropic-beta": ANTHROPIC_TOKEN_COUNTING_BETA_VERSION, + } + + # Use Azure authentication + litellm_params = litellm_params or {} + if "api_key" not in litellm_params: + litellm_params["api_key"] = api_key + + litellm_params_obj = GenericLiteLLMParams(**litellm_params) + + # Get Azure auth headers + azure_headers = BaseAzureLLM._base_validate_azure_environment( + headers={}, litellm_params=litellm_params_obj + ) + + # Merge Azure auth headers + headers.update(azure_headers) + + return headers + + def get_count_tokens_endpoint(self, api_base: str) -> str: + """ + Get the Azure AI Anthropic CountTokens API endpoint. + + Args: + api_base: The Azure AI API base URL + (e.g., https://my-resource.services.ai.azure.com or + https://my-resource.services.ai.azure.com/anthropic) + + Returns: + The endpoint URL for the CountTokens API + """ + # Azure AI Anthropic endpoint format: + # https://.services.ai.azure.com/anthropic/v1/messages/count_tokens + api_base = api_base.rstrip("/") + + # Ensure the URL has /anthropic path + if not api_base.endswith("/anthropic"): + if "/anthropic" not in api_base: + api_base = f"{api_base}/anthropic" + + # Add the count_tokens path + return f"{api_base}/v1/messages/count_tokens" diff --git a/litellm/llms/azure_ai/common_utils.py b/litellm/llms/azure_ai/common_utils.py index 1a88926ae39..281e0579c77 100644 --- a/litellm/llms/azure_ai/common_utils.py +++ b/litellm/llms/azure_ai/common_utils.py @@ -4,123 +4,6 @@ from litellm.llms.base_llm.base_utils import BaseLLMModelInfo, BaseTokenCounter from litellm.secret_managers.main import get_secret_str from litellm.types.llms.openai import AllMessageValues -from litellm.types.utils import TokenCountResponse - - -class AzureAIAnthropicTokenCounter(BaseTokenCounter): - """Token counter implementation for Azure AI Anthropic provider using the CountTokens API.""" - - def should_use_token_counting_api( - self, - custom_llm_provider: Optional[str] = None, - ) -> bool: - from litellm.types.utils import LlmProviders - - return custom_llm_provider == LlmProviders.AZURE_AI.value - - async def count_tokens( - self, - model_to_use: str, - messages: Optional[List[Dict[str, Any]]], - contents: Optional[List[Dict[str, Any]]], - deployment: Optional[Dict[str, Any]] = None, - request_model: str = "", - ) -> Optional[TokenCountResponse]: - """ - Count tokens using Azure AI Anthropic's CountTokens API. - - Args: - model_to_use: The model identifier - messages: The messages to count tokens for - contents: Alternative content format (not used for Anthropic) - deployment: Deployment configuration containing litellm_params - request_model: The original request model name - - Returns: - TokenCountResponse with token count, or None if counting fails - """ - import os - - from litellm._logging import verbose_logger - from litellm.llms.anthropic.common_utils import AnthropicError - from litellm.llms.azure_ai.anthropic.count_tokens.handler import ( - AzureAIAnthropicCountTokensHandler, - ) - - if not messages: - return None - - deployment = deployment or {} - litellm_params = deployment.get("litellm_params", {}) - - # Get Azure AI API key from deployment config or environment - api_key = litellm_params.get("api_key") - if not api_key: - api_key = os.getenv("AZURE_AI_API_KEY") - - # Get API base from deployment config or environment - api_base = litellm_params.get("api_base") - if not api_base: - api_base = os.getenv("AZURE_AI_API_BASE") - - if not api_key: - verbose_logger.warning( - "No Azure AI API key found for token counting" - ) - return None - - if not api_base: - verbose_logger.warning( - "No Azure AI API base found for token counting" - ) - return None - - try: - handler = AzureAIAnthropicCountTokensHandler() - result = await handler.handle_count_tokens_request( - model=model_to_use, - messages=messages, - api_key=api_key, - api_base=api_base, - litellm_params=litellm_params, - ) - - if result is not None: - return TokenCountResponse( - total_tokens=result.get("input_tokens", 0), - request_model=request_model, - model_used=model_to_use, - tokenizer_type="azure_ai_anthropic_api", - original_response=result, - ) - except AnthropicError as e: - verbose_logger.warning( - f"Azure AI Anthropic CountTokens API error: status={e.status_code}, message={e.message}" - ) - return TokenCountResponse( - total_tokens=0, - request_model=request_model, - model_used=model_to_use, - tokenizer_type="azure_ai_anthropic_api", - error=True, - error_message=e.message, - status_code=e.status_code, - ) - except Exception as e: - verbose_logger.warning( - f"Error calling Azure AI Anthropic CountTokens API: {e}" - ) - return TokenCountResponse( - total_tokens=0, - request_model=request_model, - model_used=model_to_use, - tokenizer_type="azure_ai_anthropic_api", - error=True, - error_message=str(e), - status_code=500, - ) - - return None class AzureFoundryModelInfo(BaseLLMModelInfo): @@ -142,9 +25,7 @@ def get_azure_ai_route(model: str) -> Literal["agents", "default"]: @staticmethod def get_api_base(api_base: Optional[str] = None) -> Optional[str]: - return ( - api_base or litellm.api_base or get_secret_str("AZURE_AI_API_BASE") - ) + return api_base or litellm.api_base or get_secret_str("AZURE_AI_API_BASE") @staticmethod def get_api_key(api_key: Optional[str] = None) -> Optional[str]: @@ -171,6 +52,10 @@ def get_token_counter(self) -> Optional[BaseTokenCounter]: """ # Only return token counter for Claude models if self._model and "claude" in self._model.lower(): + from litellm.llms.azure_ai.anthropic.count_tokens.token_counter import ( + AzureAIAnthropicTokenCounter, + ) + return AzureAIAnthropicTokenCounter() return None From f8ee2f1a0334e5779957b8211125f258dd92a589 Mon Sep 17 00:00:00 2001 From: Ishaan Jaffer Date: Tue, 20 Jan 2026 12:44:29 -0800 Subject: [PATCH 07/13] fix: fix pyrightI --- litellm/llms/azure_ai/common_utils.py | 11 +++++++++++ litellm/utils.py | 4 ++++ 2 files changed, 15 insertions(+) diff --git a/litellm/llms/azure_ai/common_utils.py b/litellm/llms/azure_ai/common_utils.py index 281e0579c77..2c711c36a0f 100644 --- a/litellm/llms/azure_ai/common_utils.py +++ b/litellm/llms/azure_ai/common_utils.py @@ -59,6 +59,17 @@ def get_token_counter(self) -> Optional[BaseTokenCounter]: return AzureAIAnthropicTokenCounter() return None + def get_models( + self, api_key: Optional[str] = None, api_base: Optional[str] = None + ) -> List[str]: + """ + Returns a list of models supported by Azure AI. + + Azure AI doesn't have a standard model listing endpoint, + so this returns an empty list. + """ + return [] + ######################################################### # Not implemented methods ######################################################### diff --git a/litellm/utils.py b/litellm/utils.py index 3e88c6fe9e3..948d6e97f4d 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -8244,6 +8244,10 @@ def get_provider_model_info( return litellm.ClarifaiConfig() elif LlmProviders.BEDROCK == provider: return litellm.llms.bedrock.common_utils.BedrockModelInfo() + elif LlmProviders.AZURE_AI == provider: + from litellm.llms.azure_ai.common_utils import AzureFoundryModelInfo + + return AzureFoundryModelInfo(model=model) return None @staticmethod From d4ca8b803bfff88e2290a5e0a7583761fe611426 Mon Sep 17 00:00:00 2001 From: Ishaan Jaffer Date: Tue, 20 Jan 2026 13:30:16 -0800 Subject: [PATCH 08/13] fix Code QA issues --- litellm/llms/anthropic/common_utils.py | 3 +- litellm/llms/azure_ai/common_utils.py | 2 +- litellm/proxy/proxy_config.yaml | 58 ++++++++------------------ 3 files changed, 19 insertions(+), 44 deletions(-) diff --git a/litellm/llms/anthropic/common_utils.py b/litellm/llms/anthropic/common_utils.py index 3a8da2a7fd6..ccfbc36c080 100644 --- a/litellm/llms/anthropic/common_utils.py +++ b/litellm/llms/anthropic/common_utils.py @@ -2,7 +2,7 @@ This file contains common utils for anthropic calls. """ -from typing import Any, Dict, List, Optional, Union +from typing import Dict, List, Optional, Union import httpx @@ -18,7 +18,6 @@ AnthropicMcpServerTool, ) from litellm.types.llms.openai import AllMessageValues -from litellm.types.utils import TokenCountResponse class AnthropicError(BaseLLMException): diff --git a/litellm/llms/azure_ai/common_utils.py b/litellm/llms/azure_ai/common_utils.py index 2c711c36a0f..01a3f5766c6 100644 --- a/litellm/llms/azure_ai/common_utils.py +++ b/litellm/llms/azure_ai/common_utils.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Literal, Optional +from typing import List, Literal, Optional import litellm from litellm.llms.base_llm.base_utils import BaseLLMModelInfo, BaseTokenCounter diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 646a062b720..958ddbf613c 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -1,51 +1,27 @@ model_list: - - model_name: gemini/* + # Anthropic direct + - model_name: anthropic-claude litellm_params: - model: gemini/* - - model_name: -claude-sonnet-4-5-20250929 - litellm_params: - model: bedrock/invoke/us.anthropic.claude-sonnet-4-5-20250929-v1:0 - model_info: - cache_creation_input_token_cost: 3.75e-06 - cache_read_input_token_cost: 3e-07 - input_cost_per_token: 3e-06 - input_cost_per_token_above_200k_tokens: 6e-06 - output_cost_per_token_above_200k_tokens: 2.25e-05 - cache_creation_input_token_cost_above_200k_tokens: 7.5e-06 - cache_read_input_token_cost_above_200k_tokens: 6e-07 - litellm_provider: bedrock_converse - max_input_tokens: 200000 - max_output_tokens: 64000 - max_tokens: 200000 - mode: chat - output_cost_per_token: 1.5e-05 - search_context_cost_per_query: - search_context_size_high: 0.01 - search_context_size_low: 0.01 - search_context_size_medium: 0.01 - supports_assistant_prefill: true - supports_computer_use: true - supports_function_calling: true - supports_pdf_input: true - supports_prompt_caching: true - supports_reasoning: true - supports_response_schema: true - supports_tool_choice: true - supports_vision: true - tool_use_system_prompt_tokens: 346 + model: anthropic/claude-sonnet-4-20250514 + api_key: os.environ/ANTHROPIC_API_KEY - - model_name: us.anthropic.claude-sonnet-4-20250514-v1:0 + # Azure AI Anthropic + - model_name: azure-ai-claude litellm_params: - model: bedrock/converse/us.anthropic.claude-sonnet-4-20250514-v1:0 - model_info: - litellm_provider: bedrock_converse - mode: chat - - model_name: claude-sonnet-4-5-20250929 + model: azure_ai/claude-3-5-sonnet + api_base: https://krish-mh44t553-eastus2.services.ai.azure.com/ + api_key: os.environ/AZURE_ANTHROPIC_API_KEY + + # Azure AI Anthropic (alternate endpoint format) + - model_name: claude-4.5-haiku litellm_params: - model: azure_ai/claude-opus-4-5 - api_base: https://krish-mh44t553-eastus2.services.ai.azure.com + model: anthropic/claude-haiku-4-5 + api_base: https://krish-mh44t553-eastus2.services.ai.azure.com/anthropic/v1/messages + api_version: "2023-06-01" api_key: os.environ/AZURE_ANTHROPIC_API_KEY + + # Search Tools Configuration - Define search providers for WebSearch interception # search_tools: # - search_tool_name: "my-perplexity-search" From 6b568cfed4f0387409c2d839b7bbda01fa4f874a Mon Sep 17 00:00:00 2001 From: Ishaan Jaffer Date: Tue, 20 Jan 2026 16:37:39 -0800 Subject: [PATCH 09/13] feat: add OAUTH handling ant --- litellm/types/llms/anthropic.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/litellm/types/llms/anthropic.py b/litellm/types/llms/anthropic.py index 779a6950d92..0e687be660f 100644 --- a/litellm/types/llms/anthropic.py +++ b/litellm/types/llms/anthropic.py @@ -642,4 +642,8 @@ class ANTHROPIC_BETA_HEADER_VALUES(str, Enum): # Effort beta header constant ANTHROPIC_EFFORT_BETA_HEADER = "effort-2025-11-24" +# OAuth constants +ANTHROPIC_OAUTH_TOKEN_PREFIX = "sk-ant-oat" +ANTHROPIC_OAUTH_BETA_HEADER = "oauth-2025-04-20" + From d179d8cf9b359c1fd03dcda86b615d934e891a88 Mon Sep 17 00:00:00 2001 From: Ishaan Jaffer Date: Tue, 20 Jan 2026 16:42:56 -0800 Subject: [PATCH 10/13] feat: Oauth handling Ant --- litellm/llms/anthropic/common_utils.py | 29 +++++++++++++++++++ .../messages/transformation.py | 9 +++++- 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/litellm/llms/anthropic/common_utils.py b/litellm/llms/anthropic/common_utils.py index ccfbc36c080..18b56c199f1 100644 --- a/litellm/llms/anthropic/common_utils.py +++ b/litellm/llms/anthropic/common_utils.py @@ -7,6 +7,7 @@ import httpx import litellm +from litellm._logging import verbose_proxy_logger from litellm.litellm_core_utils.prompt_templates.common_utils import ( get_file_ids_from_messages, ) @@ -14,12 +15,38 @@ from litellm.llms.base_llm.chat.transformation import BaseLLMException from litellm.types.llms.anthropic import ( ANTHROPIC_HOSTED_TOOLS, + ANTHROPIC_OAUTH_BETA_HEADER, + ANTHROPIC_OAUTH_TOKEN_PREFIX, AllAnthropicToolsValues, AnthropicMcpServerTool, ) from litellm.types.llms.openai import AllMessageValues +def optionally_handle_anthropic_oauth( + headers: dict, api_key: Optional[str] +) -> tuple[dict, Optional[str]]: + """ + Handle Anthropic OAuth token detection and header setup. + + If an OAuth token is detected in the Authorization header, extracts it + and sets the required OAuth headers. + + Args: + headers: Request headers dict + api_key: Current API key (may be None) + + Returns: + Tuple of (updated headers, api_key) + """ + auth_header = headers.get("authorization", "") + if auth_header and auth_header.startswith(f"Bearer {ANTHROPIC_OAUTH_TOKEN_PREFIX}"): + api_key = auth_header.replace("Bearer ", "") + headers["anthropic-beta"] = ANTHROPIC_OAUTH_BETA_HEADER + headers["anthropic-dangerous-direct-browser-access"] = "true" + return headers, api_key + + class AnthropicError(BaseLLMException): def __init__( self, @@ -371,6 +398,8 @@ def validate_environment( api_key: Optional[str] = None, api_base: Optional[str] = None, ) -> Dict: + # Check for Anthropic OAuth token in headers + headers, api_key = optionally_handle_anthropic_oauth(headers=headers, api_key=api_key) if api_key is None: raise litellm.AuthenticationError( message="Missing Anthropic API Key - A call is being made to anthropic but no key is set either in the environment variables or via params. Please set `ANTHROPIC_API_KEY` in your environment vars", diff --git a/litellm/llms/anthropic/experimental_pass_through/messages/transformation.py b/litellm/llms/anthropic/experimental_pass_through/messages/transformation.py index f67e4c8382c..7135102db01 100644 --- a/litellm/llms/anthropic/experimental_pass_through/messages/transformation.py +++ b/litellm/llms/anthropic/experimental_pass_through/messages/transformation.py @@ -17,7 +17,11 @@ from litellm.types.llms.anthropic_tool_search import get_tool_search_beta_header from litellm.types.router import GenericLiteLLMParams -from ...common_utils import AnthropicError, AnthropicModelInfo +from ...common_utils import ( + AnthropicError, + AnthropicModelInfo, + optionally_handle_anthropic_oauth, +) DEFAULT_ANTHROPIC_API_BASE = "https://api.anthropic.com" DEFAULT_ANTHROPIC_API_VERSION = "2023-06-01" @@ -68,8 +72,11 @@ def validate_anthropic_messages_environment( ) -> Tuple[dict, Optional[str]]: import os + # Check for Anthropic OAuth token in Authorization header + headers, api_key = optionally_handle_anthropic_oauth(headers=headers, api_key=api_key) if api_key is None: api_key = os.getenv("ANTHROPIC_API_KEY") + if "x-api-key" not in headers and api_key: headers["x-api-key"] = api_key if "anthropic-version" not in headers: From 6562f9dd69a3411714961ee33baf8b6fd6502ce8 Mon Sep 17 00:00:00 2001 From: Ishaan Jaffer Date: Tue, 20 Jan 2026 17:02:01 -0800 Subject: [PATCH 11/13] test anthopic common utils --- .../anthropic/test_anthropic_common_utils.py | 84 +++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 tests/test_litellm/llms/anthropic/test_anthropic_common_utils.py diff --git a/tests/test_litellm/llms/anthropic/test_anthropic_common_utils.py b/tests/test_litellm/llms/anthropic/test_anthropic_common_utils.py new file mode 100644 index 00000000000..0a397d116e7 --- /dev/null +++ b/tests/test_litellm/llms/anthropic/test_anthropic_common_utils.py @@ -0,0 +1,84 @@ +""" +Tests for Anthropic OAuth token handling for Claude Code Max integration. +""" + +import os +import sys + +# Add litellm to path +sys.path.insert(0, os.path.abspath("../../../../..")) + +# Fake OAuth token for testing (not a real secret) +FAKE_OAUTH_TOKEN = "sk-ant-oat01-fake-token-for-testing-123456789abcdef" + + +def test_oauth_detection_in_common_utils(): + """Test 1: OAuth token detection in common_utils""" + from litellm.llms.anthropic.common_utils import optionally_handle_anthropic_oauth + + headers = {"authorization": f"Bearer {FAKE_OAUTH_TOKEN}"} + updated_headers, extracted_api_key = optionally_handle_anthropic_oauth(headers, None) + + assert extracted_api_key == FAKE_OAUTH_TOKEN + assert updated_headers["anthropic-beta"] == "oauth-2025-04-20" + assert updated_headers["anthropic-dangerous-direct-browser-access"] == "true" + + +def test_oauth_integration_in_validate_environment(): + """Test 2: OAuth integration in AnthropicConfig validate_environment""" + from litellm.llms.anthropic.common_utils import AnthropicModelInfo + + config = AnthropicModelInfo() + headers = {"authorization": f"Bearer {FAKE_OAUTH_TOKEN}"} + + updated_headers = config.validate_environment( + headers=headers, + model="claude-3-haiku-20240307", + messages=[{"role": "user", "content": "Hello"}], + optional_params={}, + litellm_params={}, + api_key=None, + api_base=None, + ) + + assert updated_headers["x-api-key"] == FAKE_OAUTH_TOKEN + assert updated_headers["anthropic-dangerous-direct-browser-access"] == "true" + + +def test_oauth_detection_in_messages_transformation(): + """Test 3: OAuth detection in messages transformation""" + from litellm.llms.anthropic.experimental_pass_through.messages.transformation import ( + AnthropicMessagesConfig, + ) + + config = AnthropicMessagesConfig() + headers = {"authorization": f"Bearer {FAKE_OAUTH_TOKEN}"} + + updated_headers, _ = config.validate_anthropic_messages_environment( + headers=headers, + model="claude-3-haiku-20240307", + messages=[{"role": "user", "content": "Hello"}], + optional_params={}, + litellm_params={}, + api_key=None, + api_base=None, + ) + + assert updated_headers["x-api-key"] == FAKE_OAUTH_TOKEN + assert "oauth-2025-04-20" in updated_headers["anthropic-beta"] + assert updated_headers["anthropic-dangerous-direct-browser-access"] == "true" + + +def test_regular_api_keys_still_work(): + """Test 4: Regular API keys still work (regression test)""" + from litellm.llms.anthropic.common_utils import optionally_handle_anthropic_oauth + + regular_key = "sk-ant-api03-regular-key-123" + headers = {"authorization": f"Bearer {regular_key}"} + + updated_headers, extracted_api_key = optionally_handle_anthropic_oauth(headers, regular_key) + + # Regular key should be unchanged + assert extracted_api_key == regular_key + # OAuth headers should NOT be added + assert "anthropic-dangerous-direct-browser-access" not in updated_headers \ No newline at end of file From a5458a6d179301d9d9daa45dcf36f16dc93defe0 Mon Sep 17 00:00:00 2001 From: Ishaan Jaffer Date: Tue, 20 Jan 2026 17:13:53 -0800 Subject: [PATCH 12/13] fix code QA --- litellm/llms/anthropic/common_utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/litellm/llms/anthropic/common_utils.py b/litellm/llms/anthropic/common_utils.py index 18b56c199f1..cb23d21fbc9 100644 --- a/litellm/llms/anthropic/common_utils.py +++ b/litellm/llms/anthropic/common_utils.py @@ -7,7 +7,6 @@ import httpx import litellm -from litellm._logging import verbose_proxy_logger from litellm.litellm_core_utils.prompt_templates.common_utils import ( get_file_ids_from_messages, ) From fa4dbc37aaa509b312ae2bdebbc4f149bec6b2f6 Mon Sep 17 00:00:00 2001 From: Ishaan Jaffer Date: Tue, 20 Jan 2026 17:20:57 -0800 Subject: [PATCH 13/13] docs --- docs/my-website/docs/proxy/config_settings.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/my-website/docs/proxy/config_settings.md b/docs/my-website/docs/proxy/config_settings.md index f76fd214682..67fffc13e4b 100644 --- a/docs/my-website/docs/proxy/config_settings.md +++ b/docs/my-website/docs/proxy/config_settings.md @@ -397,6 +397,7 @@ router_settings: | AUDIO_SPEECH_CHUNK_SIZE | Chunk size for audio speech processing. Default is 1024 | ANTHROPIC_API_KEY | API key for Anthropic service | ANTHROPIC_API_BASE | Base URL for Anthropic API. Default is https://api.anthropic.com +| ANTHROPIC_TOKEN_COUNTING_BETA_VERSION | Beta version header for Anthropic token counting API. Default is `token-counting-2024-11-01` | AWS_ACCESS_KEY_ID | Access Key ID for AWS services | AWS_BATCH_ROLE_ARN | ARN of the AWS IAM role for batch operations | AWS_DEFAULT_REGION | Default AWS region for service interactions when AWS_REGION is not set @@ -412,6 +413,8 @@ router_settings: | AWS_WEB_IDENTITY_TOKEN | Web identity token for AWS | AWS_WEB_IDENTITY_TOKEN_FILE | Path to file containing web identity token for AWS | AZURE_API_VERSION | Version of the Azure API being used +| AZURE_AI_API_BASE | Base URL for Azure AI services (e.g., Azure AI Anthropic) +| AZURE_AI_API_KEY | API key for Azure AI services (e.g., Azure AI Anthropic) | AZURE_AUTHORITY_HOST | Azure authority host URL | AZURE_CERTIFICATE_PASSWORD | Password for Azure OpenAI certificate | AZURE_CLIENT_ID | Client ID for Azure services