From d0c82c7eba96b0adc714ecd9e6211b9fce5e9c2f Mon Sep 17 00:00:00 2001
From: Ishaan Jaffer <ishaanjaffer0324@gmail.com>
Date: Tue, 20 Jan 2026 11:58:17 -0800
Subject: [PATCH 01/13] fix count_tokens_with_anthropic_api

---
 litellm/proxy/utils.py | 71 +-----------------------------------------
 1 file changed, 1 insertion(+), 70 deletions(-)

diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py
index fcb678ef02a..e0855333e26 100644
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@@ -131,7 +131,6 @@
 
 unified_guardrail = UnifiedLLMGuardrails()
 
-_anthropic_async_clients = {}
 
 def print_verbose(print_statement):
     """
@@ -961,8 +960,8 @@ async def _process_guardrail_callback(
         Returns:
             Updated data dictionary if guardrail passes, None if guardrail should be skipped
         """
-        from litellm.types.guardrails import GuardrailEventHooks
         from litellm.integrations.prometheus import PrometheusLogger
+        from litellm.types.guardrails import GuardrailEventHooks
 
         # Determine the event type based on call type
         event_type = GuardrailEventHooks.pre_call
@@ -4292,74 +4291,6 @@ def construct_database_url_from_env_vars() -> Optional[str]:
     return None
 
 
-async def count_tokens_with_anthropic_api(
-    model_to_use: str,
-    messages: Optional[List[Dict[str, Any]]],
-    deployment: Optional[Dict[str, Any]] = None,
-) -> Optional[Dict[str, Any]]:
-    """
-    Helper function to count tokens using Anthropic API directly.
-
-    Args:
-        model_to_use: The model name to use for token counting
-        messages: The messages to count tokens for
-        deployment: Optional deployment configuration containing API key
-
-    Returns:
-        Optional dict with token count and tokenizer info, or None if failed
-    """
-    if not messages:
-        return None
-
-    try:
-        import os
-
-        import anthropic
-
-        # Get Anthropic API key from deployment config
-        anthropic_api_key = None
-        if deployment is not None:
-            anthropic_api_key = deployment.get("litellm_params", {}).get("api_key")
-
-        # Fallback to environment variable
-        if not anthropic_api_key:
-            anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")
-
-        if anthropic_api_key and messages:
-            # Call Anthropic API directly for more accurate token counting
-            
-            # Use cached client if available to avoid socket exhaustion
-            if anthropic_api_key not in _anthropic_async_clients:
-                _anthropic_async_clients[anthropic_api_key] = anthropic.AsyncAnthropic(api_key=anthropic_api_key)
-            
-            client = _anthropic_async_clients[anthropic_api_key]
-
-            # Call with explicit parameters to satisfy type checking
-            # Type ignore for now since messages come from generic dict input
-            response = await client.beta.messages.count_tokens(
-                model=model_to_use,
-                messages=messages,  # type: ignore
-                betas=["token-counting-2024-11-01"],
-            )
-            total_tokens = response.input_tokens
-            tokenizer_used = "anthropic_api"
-
-            return {
-                "total_tokens": total_tokens,
-                "tokenizer_used": tokenizer_used,
-            }
-
-    except ImportError:
-        verbose_proxy_logger.warning(
-            "Anthropic library not available, falling back to LiteLLM tokenizer"
-        )
-    except Exception as e:
-        verbose_proxy_logger.warning(
-            f"Error calling Anthropic API: {e}, falling back to LiteLLM tokenizer"
-        )
-    return None
-
-
 async def get_available_models_for_user(
     user_api_key_dict: "UserAPIKeyAuth",
     llm_router: Optional["Router"],

From 10a6a8c3ee08fe7f125a08e24c87230fd97b7acf Mon Sep 17 00:00:00 2001
From: Ishaan Jaffer <ishaanjaffer0324@gmail.com>
Date: Tue, 20 Jan 2026 12:13:55 -0800
Subject: [PATCH 02/13] remove outdated file

---
 tests/test_litellm/test_utils_custom.py | 45 -------------------------
 1 file changed, 45 deletions(-)
 delete mode 100644 tests/test_litellm/test_utils_custom.py

diff --git a/tests/test_litellm/test_utils_custom.py b/tests/test_litellm/test_utils_custom.py
deleted file mode 100644
index 3e924e9c719..00000000000
--- a/tests/test_litellm/test_utils_custom.py
+++ /dev/null
@@ -1,45 +0,0 @@
-import pytest
-import sys
-from unittest.mock import MagicMock, patch, AsyncMock
-from litellm.proxy.utils import count_tokens_with_anthropic_api, _anthropic_async_clients
-
-@pytest.mark.asyncio
-async def test_count_tokens_caching():
-    """
-    Test that count_tokens_with_anthropic_api caches the client.
-    """
-    # Clear cache
-    _anthropic_async_clients.clear()
-    
-    api_key = "sk-ant-test-key"
-    messages = [{"role": "user", "content": "hello"}]
-    model = "claude-3-opus-20240229"
-    
-    # Create a mock anthropic module
-    mock_anthropic = MagicMock()
-    mock_client = MagicMock()
-    mock_anthropic.AsyncAnthropic.return_value = mock_client
-    
-    # Mock response
-    mock_response = MagicMock()
-    mock_response.input_tokens = 10
-    
-    # Setup async return for count_tokens
-    mock_client.beta.messages.count_tokens = AsyncMock(return_value=mock_response)
-    
-    # Patch sys.modules to ensure our mock is used when anthropic is imported
-    with patch.dict(sys.modules, {"anthropic": mock_anthropic}):
-        # First call
-        with patch.dict("os.environ", {"ANTHROPIC_API_KEY": api_key}):
-            await count_tokens_with_anthropic_api(model, messages)
-            
-        assert api_key in _anthropic_async_clients
-        assert _anthropic_async_clients[api_key] == mock_client
-        mock_anthropic.AsyncAnthropic.assert_called_once() # Should be called once
-        
-        # Second call
-        with patch.dict("os.environ", {"ANTHROPIC_API_KEY": api_key}):
-            await count_tokens_with_anthropic_api(model, messages)
-            
-        # Should still be called once (cached)
-        mock_anthropic.AsyncAnthropic.assert_called_once()

From fa5b07d6a65d08dbaabf345fc2603158d8fac91f Mon Sep 17 00:00:00 2001
From: Ishaan Jaffer <ishaanjaffer0324@gmail.com>
Date: Tue, 20 Jan 2026 12:19:36 -0800
Subject: [PATCH 03/13] fix ANTHROPIC_TOKEN_COUNTING_BETA_VERSION

---
 litellm/constants.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/litellm/constants.py b/litellm/constants.py
index c98551fb1b6..e142c7d6304 100644
--- a/litellm/constants.py
+++ b/litellm/constants.py
@@ -323,6 +323,9 @@
 EMAIL_BUDGET_ALERT_MAX_SPEND_ALERT_PERCENTAGE = float(os.getenv("EMAIL_BUDGET_ALERT_MAX_SPEND_ALERT_PERCENTAGE", 0.8))  # 80% of max budget
 ############### LLM Provider Constants ###############
 ### ANTHROPIC CONSTANTS ###
+ANTHROPIC_TOKEN_COUNTING_BETA_VERSION = os.getenv(
+    "ANTHROPIC_TOKEN_COUNTING_BETA_VERSION", "token-counting-2024-11-01"
+)
 ANTHROPIC_SKILLS_API_BETA_VERSION = "skills-2025-10-02"
 ANTHROPIC_WEB_SEARCH_TOOL_MAX_USES = {
     "low": 1,

From 2f3f26a7323355e9011b80fda91a207b94ef4f0f Mon Sep 17 00:00:00 2001
From: Ishaan Jaffer <ishaanjaffer0324@gmail.com>
Date: Tue, 20 Jan 2026 12:21:03 -0800
Subject: [PATCH 04/13] refactor: get_token_counter

---
 litellm/llms/anthropic/common_utils.py |  42 +-----
 litellm/llms/azure_ai/common_utils.py  | 169 ++++++++++++++++++++++---
 2 files changed, 154 insertions(+), 57 deletions(-)

diff --git a/litellm/llms/anthropic/common_utils.py b/litellm/llms/anthropic/common_utils.py
index fcbe9823ed4..3a8da2a7fd6 100644
--- a/litellm/llms/anthropic/common_utils.py
+++ b/litellm/llms/anthropic/common_utils.py
@@ -476,45 +476,11 @@ def get_token_counter(self) -> Optional[BaseTokenCounter]:
         Returns:
             AnthropicTokenCounter instance for this provider.
         """
-        return AnthropicTokenCounter()
-
-
-class AnthropicTokenCounter(BaseTokenCounter):
-    """Token counter implementation for Anthropic provider."""
-
-    def should_use_token_counting_api(
-        self, 
-        custom_llm_provider: Optional[str] = None,
-    ) -> bool:
-        from litellm.types.utils import LlmProviders
-        return custom_llm_provider == LlmProviders.ANTHROPIC.value
-    
-    async def count_tokens(
-        self,
-        model_to_use: str,
-        messages: Optional[List[Dict[str, Any]]],
-        contents: Optional[List[Dict[str, Any]]],
-        deployment: Optional[Dict[str, Any]] = None,
-        request_model: str = "",
-    ) -> Optional[TokenCountResponse]:
-        from litellm.proxy.utils import count_tokens_with_anthropic_api
-        
-        result = await count_tokens_with_anthropic_api(
-            model_to_use=model_to_use,
-            messages=messages,
-            deployment=deployment,
+        from litellm.llms.anthropic.count_tokens.token_counter import (
+            AnthropicTokenCounter,
         )
-        
-        if result is not None:
-            return TokenCountResponse(
-                total_tokens=result.get("total_tokens", 0),
-                request_model=request_model,
-                model_used=model_to_use,
-                tokenizer_type=result.get("tokenizer_used", ""),
-                original_response=result,
-            )
-        
-        return None
+
+        return AnthropicTokenCounter()
 
 
 def process_anthropic_headers(headers: Union[httpx.Headers, dict]) -> dict:
diff --git a/litellm/llms/azure_ai/common_utils.py b/litellm/llms/azure_ai/common_utils.py
index 9487c7f83f2..1a88926ae39 100644
--- a/litellm/llms/azure_ai/common_utils.py
+++ b/litellm/llms/azure_ai/common_utils.py
@@ -1,17 +1,139 @@
-from typing import List, Literal, Optional
+from typing import Any, Dict, List, Literal, Optional
 
 import litellm
-from litellm.llms.base_llm.base_utils import BaseLLMModelInfo
+from litellm.llms.base_llm.base_utils import BaseLLMModelInfo, BaseTokenCounter
 from litellm.secret_managers.main import get_secret_str
 from litellm.types.llms.openai import AllMessageValues
+from litellm.types.utils import TokenCountResponse
+
+
+class AzureAIAnthropicTokenCounter(BaseTokenCounter):
+    """Token counter implementation for Azure AI Anthropic provider using the CountTokens API."""
+
+    def should_use_token_counting_api(
+        self,
+        custom_llm_provider: Optional[str] = None,
+    ) -> bool:
+        from litellm.types.utils import LlmProviders
+
+        return custom_llm_provider == LlmProviders.AZURE_AI.value
+
+    async def count_tokens(
+        self,
+        model_to_use: str,
+        messages: Optional[List[Dict[str, Any]]],
+        contents: Optional[List[Dict[str, Any]]],
+        deployment: Optional[Dict[str, Any]] = None,
+        request_model: str = "",
+    ) -> Optional[TokenCountResponse]:
+        """
+        Count tokens using Azure AI Anthropic's CountTokens API.
+
+        Args:
+            model_to_use: The model identifier
+            messages: The messages to count tokens for
+            contents: Alternative content format (not used for Anthropic)
+            deployment: Deployment configuration containing litellm_params
+            request_model: The original request model name
+
+        Returns:
+            TokenCountResponse with token count, or None if counting fails
+        """
+        import os
+
+        from litellm._logging import verbose_logger
+        from litellm.llms.anthropic.common_utils import AnthropicError
+        from litellm.llms.azure_ai.anthropic.count_tokens.handler import (
+            AzureAIAnthropicCountTokensHandler,
+        )
+
+        if not messages:
+            return None
+
+        deployment = deployment or {}
+        litellm_params = deployment.get("litellm_params", {})
+
+        # Get Azure AI API key from deployment config or environment
+        api_key = litellm_params.get("api_key")
+        if not api_key:
+            api_key = os.getenv("AZURE_AI_API_KEY")
+
+        # Get API base from deployment config or environment
+        api_base = litellm_params.get("api_base")
+        if not api_base:
+            api_base = os.getenv("AZURE_AI_API_BASE")
+
+        if not api_key:
+            verbose_logger.warning(
+                "No Azure AI API key found for token counting"
+            )
+            return None
+
+        if not api_base:
+            verbose_logger.warning(
+                "No Azure AI API base found for token counting"
+            )
+            return None
+
+        try:
+            handler = AzureAIAnthropicCountTokensHandler()
+            result = await handler.handle_count_tokens_request(
+                model=model_to_use,
+                messages=messages,
+                api_key=api_key,
+                api_base=api_base,
+                litellm_params=litellm_params,
+            )
+
+            if result is not None:
+                return TokenCountResponse(
+                    total_tokens=result.get("input_tokens", 0),
+                    request_model=request_model,
+                    model_used=model_to_use,
+                    tokenizer_type="azure_ai_anthropic_api",
+                    original_response=result,
+                )
+        except AnthropicError as e:
+            verbose_logger.warning(
+                f"Azure AI Anthropic CountTokens API error: status={e.status_code}, message={e.message}"
+            )
+            return TokenCountResponse(
+                total_tokens=0,
+                request_model=request_model,
+                model_used=model_to_use,
+                tokenizer_type="azure_ai_anthropic_api",
+                error=True,
+                error_message=e.message,
+                status_code=e.status_code,
+            )
+        except Exception as e:
+            verbose_logger.warning(
+                f"Error calling Azure AI Anthropic CountTokens API: {e}"
+            )
+            return TokenCountResponse(
+                total_tokens=0,
+                request_model=request_model,
+                model_used=model_to_use,
+                tokenizer_type="azure_ai_anthropic_api",
+                error=True,
+                error_message=str(e),
+                status_code=500,
+            )
+
+        return None
 
 
 class AzureFoundryModelInfo(BaseLLMModelInfo):
+    """Model info for Azure AI / Azure Foundry models."""
+
+    def __init__(self, model: Optional[str] = None):
+        self._model = model
+
     @staticmethod
     def get_azure_ai_route(model: str) -> Literal["agents", "default"]:
         """
         Get the Azure AI route for the given model.
-        
+
         Similar to BedrockModelInfo.get_bedrock_route().
         """
         if "agents/" in model:
@@ -21,33 +143,40 @@ def get_azure_ai_route(model: str) -> Literal["agents", "default"]:
     @staticmethod
     def get_api_base(api_base: Optional[str] = None) -> Optional[str]:
         return (
-                api_base
-                or litellm.api_base
-                or get_secret_str("AZURE_AI_API_BASE")
+            api_base or litellm.api_base or get_secret_str("AZURE_AI_API_BASE")
         )
-    
+
     @staticmethod
     def get_api_key(api_key: Optional[str] = None) -> Optional[str]:
         return (
-                api_key
-                or litellm.api_key
-                or litellm.openai_key
-                or get_secret_str("AZURE_AI_API_KEY")
-            )
-    
+            api_key
+            or litellm.api_key
+            or litellm.openai_key
+            or get_secret_str("AZURE_AI_API_KEY")
+        )
+
     @property
     def api_version(self, api_version: Optional[str] = None) -> Optional[str]:
         api_version = (
-            api_version
-            or litellm.api_version
-            or get_secret_str("AZURE_API_VERSION")
+            api_version or litellm.api_version or get_secret_str("AZURE_API_VERSION")
         )
         return api_version
-    
+
+    def get_token_counter(self) -> Optional[BaseTokenCounter]:
+        """
+        Factory method to create a token counter for Azure AI.
+
+        Returns:
+            AzureAIAnthropicTokenCounter for Claude models, None otherwise.
+        """
+        # Only return token counter for Claude models
+        if self._model and "claude" in self._model.lower():
+            return AzureAIAnthropicTokenCounter()
+        return None
+
     #########################################################
     # Not implemented methods
     #########################################################
-        
 
     @staticmethod
     def get_base_model(model: str) -> Optional[str]:
@@ -64,4 +193,6 @@ def validate_environment(
         api_base: Optional[str] = None,
     ) -> dict:
         """Azure Foundry sends api key in query params"""
-        raise NotImplementedError("Azure Foundry does not support environment validation")
+        raise NotImplementedError(
+            "Azure Foundry does not support environment validation"
+        )

From aec8299d8b3e0664aca9323855cf38db4b738104 Mon Sep 17 00:00:00 2001
From: Ishaan Jaffer <ishaanjaffer0324@gmail.com>
Date: Tue, 20 Jan 2026 12:22:02 -0800
Subject: [PATCH 05/13] init test suite for token counter

---
 .../base_token_counter_test.py                | 130 ++++++++++++++++++
 .../test_anthropic_token_counter.py           |  47 +++++++
 .../test_azure_ai_anthropic_token_counter.py  |  53 +++++++
 .../test_bedrock_token_counter.py             | 101 ++++++++++++++
 .../test_proxy_token_counter.py               |  83 ++++++-----
 5 files changed, 380 insertions(+), 34 deletions(-)
 create mode 100644 tests/litellm_utils_tests/base_token_counter_test.py
 create mode 100644 tests/litellm_utils_tests/test_anthropic_token_counter.py
 create mode 100644 tests/litellm_utils_tests/test_azure_ai_anthropic_token_counter.py
 create mode 100644 tests/litellm_utils_tests/test_bedrock_token_counter.py

diff --git a/tests/litellm_utils_tests/base_token_counter_test.py b/tests/litellm_utils_tests/base_token_counter_test.py
new file mode 100644
index 00000000000..b5e87021a0b
--- /dev/null
+++ b/tests/litellm_utils_tests/base_token_counter_test.py
@@ -0,0 +1,130 @@
+"""
+Base Token Counter Test Suite.
+
+This module provides an abstract base test class that enforces common tests
+across all token counter implementations. Similar to base_llm_unit_tests.py
+for LLM chat tests.
+
+Usage:
+    Create a test class that inherits from BaseTokenCounterTest and implement
+    the abstract methods to provide provider-specific configuration.
+"""
+
+import os
+import sys
+from abc import ABC, abstractmethod
+from typing import Any, Dict, List, Optional
+
+import pytest
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+
+from litellm.llms.base_llm.base_utils import BaseTokenCounter
+from litellm.types.utils import TokenCountResponse
+
+
+class BaseTokenCounterTest(ABC):
+    """
+    Abstract base test class for token counter implementations.
+
+    Subclasses must implement:
+        - get_token_counter(): Returns the token counter instance
+        - get_test_model(): Returns the model name to use for testing
+        - get_test_messages(): Returns test messages for token counting
+        - get_deployment_config(): Returns deployment configuration with credentials
+        - get_custom_llm_provider(): Returns the provider name for should_use_token_counting_api
+    """
+
+    @abstractmethod
+    def get_token_counter(self) -> BaseTokenCounter:
+        """Must return the token counter instance to test."""
+        pass
+
+    @abstractmethod
+    def get_test_model(self) -> str:
+        """Must return the model name to use for testing."""
+        pass
+
+    @abstractmethod
+    def get_test_messages(self) -> List[Dict[str, Any]]:
+        """Must return test messages for token counting."""
+        pass
+
+    @abstractmethod
+    def get_deployment_config(self) -> Dict[str, Any]:
+        """Must return deployment configuration with credentials."""
+        pass
+
+    @abstractmethod
+    def get_custom_llm_provider(self) -> str:
+        """Must return the provider name for should_use_token_counting_api check."""
+        pass
+
+    @pytest.fixture(autouse=True)
+    def _handle_missing_credentials(self):
+        """Fixture to skip tests when credentials are missing."""
+        try:
+            yield
+        except Exception as e:
+            error_str = str(e).lower()
+            if "api key" in error_str or "api_key" in error_str or "unauthorized" in error_str:
+                pytest.skip(f"Missing or invalid credentials: {e}")
+            raise
+
+    @pytest.mark.asyncio
+    async def test_count_tokens_basic(self):
+        """
+        Test basic token counting functionality.
+
+        Verifies that:
+        - Token counter returns a TokenCountResponse
+        - total_tokens is greater than 0
+        - tokenizer_type is set
+        - No error occurred
+        """
+        token_counter = self.get_token_counter()
+        model = self.get_test_model()
+        messages = self.get_test_messages()
+        deployment = self.get_deployment_config()
+
+        result = await token_counter.count_tokens(
+            model_to_use=model,
+            messages=messages,
+            contents=None,
+            deployment=deployment,
+            request_model=model,
+        )
+
+        print(f"Token count result: {result}")
+
+        assert result is not None, "Token counter should return a result"
+        assert isinstance(result, TokenCountResponse), "Result should be TokenCountResponse"
+        assert result.total_tokens > 0, f"Token count should be > 0, got {result.total_tokens}"
+        assert result.tokenizer_type is not None, "tokenizer_type should be set"
+        assert result.error is not True, f"Token counting should not error: {result.error_message}"
+
+    def test_should_use_token_counting_api(self):
+        """
+        Test that should_use_token_counting_api returns True for the correct provider.
+
+        Verifies that the token counter correctly identifies when it should be used
+        based on the custom_llm_provider.
+        """
+        token_counter = self.get_token_counter()
+        provider = self.get_custom_llm_provider()
+
+        result = token_counter.should_use_token_counting_api(
+            custom_llm_provider=provider
+        )
+
+        assert result is True, f"should_use_token_counting_api should return True for {provider}"
+
+        # Also verify it returns False for other providers
+        other_provider = "some_other_provider_that_doesnt_exist"
+        result_other = token_counter.should_use_token_counting_api(
+            custom_llm_provider=other_provider
+        )
+
+        assert result_other is False, f"should_use_token_counting_api should return False for {other_provider}"
diff --git a/tests/litellm_utils_tests/test_anthropic_token_counter.py b/tests/litellm_utils_tests/test_anthropic_token_counter.py
new file mode 100644
index 00000000000..a1fbcecfdd4
--- /dev/null
+++ b/tests/litellm_utils_tests/test_anthropic_token_counter.py
@@ -0,0 +1,47 @@
+"""
+Anthropic Token Counter Tests.
+
+Tests for the Anthropic token counter implementation using the base test suite.
+"""
+
+import os
+import sys
+from typing import Any, Dict, List
+
+import pytest
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+
+from litellm.llms.anthropic.count_tokens import AnthropicTokenCounter
+from litellm.llms.base_llm.base_utils import BaseTokenCounter
+from tests.litellm_utils_tests.base_token_counter_test import BaseTokenCounterTest
+
+
+class TestAnthropicTokenCounter(BaseTokenCounterTest):
+    """Test suite for Anthropic token counter."""
+
+    def get_token_counter(self) -> BaseTokenCounter:
+        return AnthropicTokenCounter()
+
+    def get_test_model(self) -> str:
+        return "claude-sonnet-4-20250514"
+
+    def get_test_messages(self) -> List[Dict[str, Any]]:
+        return [
+            {"role": "user", "content": "Hello, how are you today?"}
+        ]
+
+    def get_deployment_config(self) -> Dict[str, Any]:
+        api_key = os.getenv("ANTHROPIC_API_KEY")
+        if not api_key:
+            pytest.skip("ANTHROPIC_API_KEY not set")
+        return {
+            "litellm_params": {
+                "api_key": api_key,
+            }
+        }
+
+    def get_custom_llm_provider(self) -> str:
+        return "anthropic"
diff --git a/tests/litellm_utils_tests/test_azure_ai_anthropic_token_counter.py b/tests/litellm_utils_tests/test_azure_ai_anthropic_token_counter.py
new file mode 100644
index 00000000000..031502cbece
--- /dev/null
+++ b/tests/litellm_utils_tests/test_azure_ai_anthropic_token_counter.py
@@ -0,0 +1,53 @@
+"""
+Azure AI Anthropic Token Counter Tests.
+
+Tests for the Azure AI Anthropic token counter implementation using the base test suite.
+"""
+
+import os
+import sys
+from typing import Any, Dict, List
+
+import pytest
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+
+from litellm.llms.azure_ai.anthropic.count_tokens import AzureAIAnthropicTokenCounter
+from litellm.llms.base_llm.base_utils import BaseTokenCounter
+from tests.litellm_utils_tests.base_token_counter_test import BaseTokenCounterTest
+
+
+class TestAzureAIAnthropicTokenCounter(BaseTokenCounterTest):
+    """Test suite for Azure AI Anthropic token counter."""
+
+    def get_token_counter(self) -> BaseTokenCounter:
+        return AzureAIAnthropicTokenCounter()
+
+    def get_test_model(self) -> str:
+        return "claude-3-5-sonnet"
+
+    def get_test_messages(self) -> List[Dict[str, Any]]:
+        return [
+            {"role": "user", "content": "Hello, how are you today?"}
+        ]
+
+    def get_deployment_config(self) -> Dict[str, Any]:
+        api_key = os.getenv("AZURE_AI_API_KEY")
+        api_base = os.getenv("AZURE_AI_API_BASE")
+        
+        if not api_key:
+            pytest.skip("AZURE_AI_API_KEY not set")
+        if not api_base:
+            pytest.skip("AZURE_AI_API_BASE not set")
+            
+        return {
+            "litellm_params": {
+                "api_key": api_key,
+                "api_base": api_base,
+            }
+        }
+
+    def get_custom_llm_provider(self) -> str:
+        return "azure_ai"
diff --git a/tests/litellm_utils_tests/test_bedrock_token_counter.py b/tests/litellm_utils_tests/test_bedrock_token_counter.py
new file mode 100644
index 00000000000..f7c29918820
--- /dev/null
+++ b/tests/litellm_utils_tests/test_bedrock_token_counter.py
@@ -0,0 +1,101 @@
+"""
+Bedrock Token Counter Tests.
+
+Tests for the Bedrock token counter implementation using the base test suite.
+
+Note: Not all Bedrock models support token counting. The CountTokens API
+is only available for specific models. If the model doesn't support token
+counting, the test will be skipped.
+"""
+
+import os
+import sys
+from typing import Any, Dict, List
+
+import pytest
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+
+from litellm.llms.base_llm.base_utils import BaseTokenCounter
+from litellm.llms.bedrock.count_tokens.bedrock_token_counter import BedrockTokenCounter
+from tests.litellm_utils_tests.base_token_counter_test import BaseTokenCounterTest
+
+
+class TestBedrockTokenCounter(BaseTokenCounterTest):
+    """Test suite for Bedrock token counter.
+    
+    Note: Bedrock CountTokens API support varies by model. Some models
+    (like older Claude versions) may not support token counting.
+    Use amazon.nova-* models for reliable token counting support.
+    """
+
+    def get_token_counter(self) -> BaseTokenCounter:
+        return BedrockTokenCounter()
+
+    def get_test_model(self) -> str:
+        # Use Amazon Nova model which supports token counting
+        # Alternatively, use environment variable to override
+        return os.getenv("BEDROCK_TEST_MODEL", "amazon.nova-lite-v1:0")
+
+    def get_test_messages(self) -> List[Dict[str, Any]]:
+        return [
+            {"role": "user", "content": "Hello, how are you today?"}
+        ]
+
+    def get_deployment_config(self) -> Dict[str, Any]:
+        # Bedrock uses AWS credentials from environment
+        # Check for AWS credentials
+        aws_access_key = os.getenv("AWS_ACCESS_KEY_ID")
+        aws_secret_key = os.getenv("AWS_SECRET_ACCESS_KEY")
+        aws_region = os.getenv("AWS_REGION_NAME", "us-east-1")
+        
+        if not aws_access_key or not aws_secret_key:
+            pytest.skip("AWS credentials not set (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)")
+            
+        return {
+            "litellm_params": {
+                "aws_access_key_id": aws_access_key,
+                "aws_secret_access_key": aws_secret_key,
+                "aws_region_name": aws_region,
+            }
+        }
+
+    def get_custom_llm_provider(self) -> str:
+        return "bedrock"
+
+    @pytest.mark.asyncio
+    async def test_count_tokens_basic(self):
+        """
+        Test basic token counting functionality.
+        
+        Override to handle models that don't support token counting.
+        """
+        from litellm.types.utils import TokenCountResponse
+
+        token_counter = self.get_token_counter()
+        model = self.get_test_model()
+        messages = self.get_test_messages()
+        deployment = self.get_deployment_config()
+
+        result = await token_counter.count_tokens(
+            model_to_use=model,
+            messages=messages,
+            contents=None,
+            deployment=deployment,
+            request_model=model,
+        )
+
+        print(f"Token count result: {result}")
+
+        assert result is not None, "Token counter should return a result"
+        assert isinstance(result, TokenCountResponse), "Result should be TokenCountResponse"
+        
+        # Check if the model doesn't support token counting
+        if result.error and "doesn't support counting tokens" in str(result.error_message):
+            pytest.skip(f"Model {model} doesn't support token counting: {result.error_message}")
+        
+        assert result.total_tokens > 0, f"Token count should be > 0, got {result.total_tokens}"
+        assert result.tokenizer_type is not None, "tokenizer_type should be set"
+        assert result.error is not True, f"Token counting should not error: {result.error_message}"
diff --git a/tests/proxy_unit_tests/test_proxy_token_counter.py b/tests/proxy_unit_tests/test_proxy_token_counter.py
index 8e1057bb8e1..4a2ff28cfd3 100644
--- a/tests/proxy_unit_tests/test_proxy_token_counter.py
+++ b/tests/proxy_unit_tests/test_proxy_token_counter.py
@@ -478,18 +478,19 @@ async def mock_read_request_body(request):
 @pytest.mark.asyncio
 async def test_factory_anthropic_endpoint_calls_anthropic_counter():
     """Test that /v1/messages/count_tokens with Anthropic model uses Anthropic counter."""
-    from unittest.mock import patch, AsyncMock
+    from unittest.mock import patch, AsyncMock, MagicMock
     from fastapi.testclient import TestClient
     from litellm.proxy.proxy_server import app
 
-    # Mock the anthropic token counting function
+    # Mock the Anthropic CountTokens handler
     with patch(
-        "litellm.proxy.utils.count_tokens_with_anthropic_api"
-    ) as mock_anthropic_count:
-        mock_anthropic_count.return_value = {
-            "total_tokens": 42,
-            "tokenizer_used": "anthropic",
-        }
+        "litellm.llms.anthropic.common_utils.AnthropicCountTokensHandler"
+    ) as MockHandler:
+        mock_handler_instance = MagicMock()
+        mock_handler_instance.handle_count_tokens_request = AsyncMock(
+            return_value={"input_tokens": 42}
+        )
+        MockHandler.return_value = mock_handler_instance
 
         # Mock router to return Anthropic deployment
         with patch("litellm.proxy.proxy_server.llm_router") as mock_router:
@@ -510,36 +511,44 @@ async def test_factory_anthropic_endpoint_calls_anthropic_counter():
                 }
             )
 
-            client = TestClient(app)
+            # Set ANTHROPIC_API_KEY for the test
+            with patch.dict("os.environ", {"ANTHROPIC_API_KEY": "test-key"}):
+                client = TestClient(app)
 
-            response = client.post(
-                "/v1/messages/count_tokens",
-                json={
-                    "model": "claude-3-5-sonnet",
-                    "messages": [{"role": "user", "content": "Hello"}],
-                },
-                headers={"Authorization": "Bearer test-key"},
-            )
+                response = client.post(
+                    "/v1/messages/count_tokens",
+                    json={
+                        "model": "claude-3-5-sonnet",
+                        "messages": [{"role": "user", "content": "Hello"}],
+                    },
+                    headers={"Authorization": "Bearer test-key"},
+                )
 
-            assert response.status_code == 200
-            data = response.json()
-            assert data["input_tokens"] == 42
+                assert response.status_code == 200
+                data = response.json()
+                assert data["input_tokens"] == 42
 
-            # Verify that Anthropic API was called
-            mock_anthropic_count.assert_called_once()
+                # Verify that Anthropic handler was called
+                mock_handler_instance.handle_count_tokens_request.assert_called_once()
 
 
 @pytest.mark.asyncio
 async def test_factory_gpt4_endpoint_does_not_call_anthropic_counter():
     """Test that /v1/messages/count_tokens with GPT-4 does NOT use Anthropic counter."""
-    from unittest.mock import patch, AsyncMock
+    from unittest.mock import patch, AsyncMock, MagicMock
     from fastapi.testclient import TestClient
     from litellm.proxy.proxy_server import app
 
-    # Mock the anthropic token counting function
+    # Mock the Anthropic CountTokens handler
     with patch(
-        "litellm.proxy.utils.count_tokens_with_anthropic_api"
-    ) as mock_anthropic_count:
+        "litellm.llms.anthropic.common_utils.AnthropicCountTokensHandler"
+    ) as MockHandler:
+        mock_handler_instance = MagicMock()
+        mock_handler_instance.handle_count_tokens_request = AsyncMock(
+            return_value={"input_tokens": 42}
+        )
+        MockHandler.return_value = mock_handler_instance
+
         # Mock litellm token counter
         with patch("litellm.token_counter") as mock_litellm_counter:
             mock_litellm_counter.return_value = 50
@@ -578,21 +587,27 @@ async def test_factory_gpt4_endpoint_does_not_call_anthropic_counter():
                 data = response.json()
                 assert data["input_tokens"] == 50
 
-                # Verify that Anthropic API was NOT called
-                mock_anthropic_count.assert_not_called()
+                # Verify that Anthropic handler was NOT called
+                mock_handler_instance.handle_count_tokens_request.assert_not_called()
 
 
 @pytest.mark.asyncio
 async def test_factory_normal_token_counter_endpoint_does_not_call_anthropic():
     """Test that /utils/token_counter does NOT use Anthropic counter even with Anthropic model."""
-    from unittest.mock import patch, AsyncMock
+    from unittest.mock import patch, AsyncMock, MagicMock
     from fastapi.testclient import TestClient
     from litellm.proxy.proxy_server import app
 
-    # Mock the anthropic token counting function
+    # Mock the Anthropic CountTokens handler
     with patch(
-        "litellm.proxy.utils.count_tokens_with_anthropic_api"
-    ) as mock_anthropic_count:
+        "litellm.llms.anthropic.common_utils.AnthropicCountTokensHandler"
+    ) as MockHandler:
+        mock_handler_instance = MagicMock()
+        mock_handler_instance.handle_count_tokens_request = AsyncMock(
+            return_value={"input_tokens": 42}
+        )
+        MockHandler.return_value = mock_handler_instance
+
         # Mock litellm token counter
         with patch("litellm.token_counter") as mock_litellm_counter:
             mock_litellm_counter.return_value = 35
@@ -635,8 +650,8 @@ async def test_factory_normal_token_counter_endpoint_does_not_call_anthropic():
                 data = response.json()
                 assert data["total_tokens"] == 35
 
-                # Verify that Anthropic API was NOT called (since call_endpoint=False)
-                mock_anthropic_count.assert_not_called()
+                # Verify that Anthropic handler was NOT called (since call_endpoint=False)
+                mock_handler_instance.handle_count_tokens_request.assert_not_called()
 
 
 @pytest.mark.asyncio

From 2e690fce3fd80a85c8312f00c582a46132890ee8 Mon Sep 17 00:00:00 2001
From: Ishaan Jaffer <ishaanjaffer0324@gmail.com>
Date: Tue, 20 Jan 2026 12:23:47 -0800
Subject: [PATCH 06/13] init token counters

---
 .../llms/anthropic/count_tokens/__init__.py   |  15 ++
 .../llms/anthropic/count_tokens/handler.py    | 126 +++++++++++++++++
 .../anthropic/count_tokens/token_counter.py   | 104 ++++++++++++++
 .../anthropic/count_tokens/transformation.py  | 121 ++++++++++++++++
 .../anthropic/count_tokens/__init__.py        |  19 +++
 .../anthropic/count_tokens/handler.py         | 131 ++++++++++++++++++
 .../anthropic/count_tokens/token_counter.py   | 119 ++++++++++++++++
 .../anthropic/count_tokens/transformation.py  |  88 ++++++++++++
 litellm/llms/azure_ai/common_utils.py         | 125 +----------------
 9 files changed, 728 insertions(+), 120 deletions(-)
 create mode 100644 litellm/llms/anthropic/count_tokens/__init__.py
 create mode 100644 litellm/llms/anthropic/count_tokens/handler.py
 create mode 100644 litellm/llms/anthropic/count_tokens/token_counter.py
 create mode 100644 litellm/llms/anthropic/count_tokens/transformation.py
 create mode 100644 litellm/llms/azure_ai/anthropic/count_tokens/__init__.py
 create mode 100644 litellm/llms/azure_ai/anthropic/count_tokens/handler.py
 create mode 100644 litellm/llms/azure_ai/anthropic/count_tokens/token_counter.py
 create mode 100644 litellm/llms/azure_ai/anthropic/count_tokens/transformation.py

diff --git a/litellm/llms/anthropic/count_tokens/__init__.py b/litellm/llms/anthropic/count_tokens/__init__.py
new file mode 100644
index 00000000000..ef46862bda6
--- /dev/null
+++ b/litellm/llms/anthropic/count_tokens/__init__.py
@@ -0,0 +1,15 @@
+"""
+Anthropic CountTokens API implementation.
+"""
+
+from litellm.llms.anthropic.count_tokens.handler import AnthropicCountTokensHandler
+from litellm.llms.anthropic.count_tokens.token_counter import AnthropicTokenCounter
+from litellm.llms.anthropic.count_tokens.transformation import (
+    AnthropicCountTokensConfig,
+)
+
+__all__ = [
+    "AnthropicCountTokensHandler",
+    "AnthropicCountTokensConfig",
+    "AnthropicTokenCounter",
+]
diff --git a/litellm/llms/anthropic/count_tokens/handler.py b/litellm/llms/anthropic/count_tokens/handler.py
new file mode 100644
index 00000000000..422c6f0e559
--- /dev/null
+++ b/litellm/llms/anthropic/count_tokens/handler.py
@@ -0,0 +1,126 @@
+"""
+Anthropic CountTokens API handler.
+
+Uses httpx for HTTP requests instead of the Anthropic SDK.
+"""
+
+from typing import Any, Dict, List, Optional, Union
+
+import httpx
+
+import litellm
+from litellm._logging import verbose_logger
+from litellm.llms.anthropic.common_utils import AnthropicError
+from litellm.llms.anthropic.count_tokens.transformation import (
+    AnthropicCountTokensConfig,
+)
+from litellm.llms.custom_httpx.http_handler import get_async_httpx_client
+
+
+class AnthropicCountTokensHandler(AnthropicCountTokensConfig):
+    """
+    Handler for Anthropic CountTokens API requests.
+
+    Uses httpx for HTTP requests, following the same pattern as BedrockCountTokensHandler.
+    """
+
+    async def handle_count_tokens_request(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        api_key: str,
+        api_base: Optional[str] = None,
+        timeout: Optional[Union[float, httpx.Timeout]] = None,
+    ) -> Dict[str, Any]:
+        """
+        Handle a CountTokens request using httpx.
+
+        Args:
+            model: The model identifier (e.g., "claude-3-5-sonnet-20241022")
+            messages: The messages to count tokens for
+            api_key: The Anthropic API key
+            api_base: Optional custom API base URL
+            timeout: Optional timeout for the request (defaults to litellm.request_timeout)
+
+        Returns:
+            Dictionary containing token count response
+
+        Raises:
+            AnthropicError: If the API request fails
+        """
+        try:
+            # Validate the request
+            self.validate_request(model, messages)
+
+            verbose_logger.debug(
+                f"Processing Anthropic CountTokens request for model: {model}"
+            )
+
+            # Transform request to Anthropic format
+            request_body = self.transform_request_to_count_tokens(
+                model=model,
+                messages=messages,
+            )
+
+            verbose_logger.debug(f"Transformed request: {request_body}")
+
+            # Get endpoint URL
+            endpoint_url = api_base or self.get_anthropic_count_tokens_endpoint()
+
+            verbose_logger.debug(f"Making request to: {endpoint_url}")
+
+            # Get required headers
+            headers = self.get_required_headers(api_key)
+
+            # Use LiteLLM's async httpx client
+            async_client = get_async_httpx_client(
+                llm_provider=litellm.LlmProviders.ANTHROPIC
+            )
+
+            # Use provided timeout or fall back to litellm.request_timeout
+            request_timeout = timeout if timeout is not None else litellm.request_timeout
+
+            response = await async_client.post(
+                endpoint_url,
+                headers=headers,
+                json=request_body,
+                timeout=request_timeout,
+            )
+
+            verbose_logger.debug(f"Response status: {response.status_code}")
+
+            if response.status_code != 200:
+                error_text = response.text
+                verbose_logger.error(f"Anthropic API error: {error_text}")
+                raise AnthropicError(
+                    status_code=response.status_code,
+                    message=error_text,
+                )
+
+            anthropic_response = response.json()
+
+            verbose_logger.debug(f"Anthropic response: {anthropic_response}")
+
+            # Transform response
+            final_response = self.transform_response(anthropic_response)
+
+            verbose_logger.debug(f"Final response: {final_response}")
+
+            return final_response
+
+        except AnthropicError:
+            # Re-raise Anthropic exceptions as-is
+            raise
+        except httpx.HTTPStatusError as e:
+            # HTTP errors - preserve the actual status code
+            verbose_logger.error(f"HTTP error in CountTokens handler: {str(e)}")
+            raise AnthropicError(
+                status_code=e.response.status_code,
+                message=e.response.text,
+            )
+        except Exception as e:
+            verbose_logger.error(f"Error in CountTokens handler: {str(e)}")
+            raise AnthropicError(
+                status_code=500,
+                message=f"CountTokens processing error: {str(e)}",
+            )
diff --git a/litellm/llms/anthropic/count_tokens/token_counter.py b/litellm/llms/anthropic/count_tokens/token_counter.py
new file mode 100644
index 00000000000..266b2794fc3
--- /dev/null
+++ b/litellm/llms/anthropic/count_tokens/token_counter.py
@@ -0,0 +1,104 @@
+"""
+Anthropic Token Counter implementation using the CountTokens API.
+"""
+
+import os
+from typing import Any, Dict, List, Optional
+
+from litellm._logging import verbose_logger
+from litellm.llms.anthropic.count_tokens.handler import AnthropicCountTokensHandler
+from litellm.llms.base_llm.base_utils import BaseTokenCounter
+from litellm.types.utils import LlmProviders, TokenCountResponse
+
+# Global handler instance - reuse across all token counting requests
+anthropic_count_tokens_handler = AnthropicCountTokensHandler()
+
+
+class AnthropicTokenCounter(BaseTokenCounter):
+    """Token counter implementation for Anthropic provider using the CountTokens API."""
+
+    def should_use_token_counting_api(
+        self,
+        custom_llm_provider: Optional[str] = None,
+    ) -> bool:
+        return custom_llm_provider == LlmProviders.ANTHROPIC.value
+
+    async def count_tokens(
+        self,
+        model_to_use: str,
+        messages: Optional[List[Dict[str, Any]]],
+        contents: Optional[List[Dict[str, Any]]],
+        deployment: Optional[Dict[str, Any]] = None,
+        request_model: str = "",
+    ) -> Optional[TokenCountResponse]:
+        """
+        Count tokens using Anthropic's CountTokens API.
+
+        Args:
+            model_to_use: The model identifier
+            messages: The messages to count tokens for
+            contents: Alternative content format (not used for Anthropic)
+            deployment: Deployment configuration containing litellm_params
+            request_model: The original request model name
+
+        Returns:
+            TokenCountResponse with token count, or None if counting fails
+        """
+        from litellm.llms.anthropic.common_utils import AnthropicError
+
+        if not messages:
+            return None
+
+        deployment = deployment or {}
+        litellm_params = deployment.get("litellm_params", {})
+
+        # Get Anthropic API key from deployment config or environment
+        api_key = litellm_params.get("api_key")
+        if not api_key:
+            api_key = os.getenv("ANTHROPIC_API_KEY")
+
+        if not api_key:
+            verbose_logger.warning("No Anthropic API key found for token counting")
+            return None
+
+        try:
+            result = await anthropic_count_tokens_handler.handle_count_tokens_request(
+                model=model_to_use,
+                messages=messages,
+                api_key=api_key,
+            )
+
+            if result is not None:
+                return TokenCountResponse(
+                    total_tokens=result.get("input_tokens", 0),
+                    request_model=request_model,
+                    model_used=model_to_use,
+                    tokenizer_type="anthropic_api",
+                    original_response=result,
+                )
+        except AnthropicError as e:
+            verbose_logger.warning(
+                f"Anthropic CountTokens API error: status={e.status_code}, message={e.message}"
+            )
+            return TokenCountResponse(
+                total_tokens=0,
+                request_model=request_model,
+                model_used=model_to_use,
+                tokenizer_type="anthropic_api",
+                error=True,
+                error_message=e.message,
+                status_code=e.status_code,
+            )
+        except Exception as e:
+            verbose_logger.warning(f"Error calling Anthropic CountTokens API: {e}")
+            return TokenCountResponse(
+                total_tokens=0,
+                request_model=request_model,
+                model_used=model_to_use,
+                tokenizer_type="anthropic_api",
+                error=True,
+                error_message=str(e),
+                status_code=500,
+            )
+
+        return None
diff --git a/litellm/llms/anthropic/count_tokens/transformation.py b/litellm/llms/anthropic/count_tokens/transformation.py
new file mode 100644
index 00000000000..beffd3beeec
--- /dev/null
+++ b/litellm/llms/anthropic/count_tokens/transformation.py
@@ -0,0 +1,121 @@
+"""
+Anthropic CountTokens API transformation logic.
+
+This module handles the transformation of requests to Anthropic's CountTokens API format.
+"""
+
+from typing import Any, Dict, List
+
+from litellm.constants import ANTHROPIC_TOKEN_COUNTING_BETA_VERSION
+
+
+class AnthropicCountTokensConfig:
+    """
+    Configuration and transformation logic for Anthropic CountTokens API.
+
+    Anthropic CountTokens API Specification:
+    - Endpoint: POST https://api.anthropic.com/v1/messages/count_tokens
+    - Beta header required: anthropic-beta: token-counting-2024-11-01
+    - Response: {"input_tokens": <number>}
+    """
+
+    def get_anthropic_count_tokens_endpoint(self) -> str:
+        """
+        Get the Anthropic CountTokens API endpoint.
+
+        Returns:
+            The endpoint URL for the CountTokens API
+        """
+        return "https://api.anthropic.com/v1/messages/count_tokens"
+
+    def transform_request_to_count_tokens(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+    ) -> Dict[str, Any]:
+        """
+        Transform request to Anthropic CountTokens format.
+
+        Input:
+        {
+            "model": "claude-3-5-sonnet-20241022",
+            "messages": [{"role": "user", "content": "Hello!"}]
+        }
+
+        Output (Anthropic CountTokens format):
+        {
+            "model": "claude-3-5-sonnet-20241022",
+            "messages": [{"role": "user", "content": "Hello!"}]
+        }
+        """
+        return {
+            "model": model,
+            "messages": messages,
+        }
+
+    def transform_response(self, response: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Transform Anthropic CountTokens response.
+
+        Input (Anthropic response):
+        {
+            "input_tokens": 123
+        }
+
+        Output:
+        {
+            "input_tokens": 123
+        }
+        """
+        return {
+            "input_tokens": response.get("input_tokens", 0),
+        }
+
+    def get_required_headers(self, api_key: str) -> Dict[str, str]:
+        """
+        Get the required headers for the CountTokens API.
+
+        Args:
+            api_key: The Anthropic API key
+
+        Returns:
+            Dictionary of required headers
+        """
+        return {
+            "Content-Type": "application/json",
+            "x-api-key": api_key,
+            "anthropic-version": "2023-06-01",
+            "anthropic-beta": ANTHROPIC_TOKEN_COUNTING_BETA_VERSION,
+        }
+
+    def validate_request(
+        self, model: str, messages: List[Dict[str, Any]]
+    ) -> None:
+        """
+        Validate the incoming count tokens request.
+
+        Args:
+            model: The model name
+            messages: The messages to count tokens for
+
+        Raises:
+            ValueError: If the request is invalid
+        """
+        if not model:
+            raise ValueError("model parameter is required")
+
+        if not messages:
+            raise ValueError("messages parameter is required")
+
+        if not isinstance(messages, list):
+            raise ValueError("messages must be a list")
+
+        for i, message in enumerate(messages):
+            if not isinstance(message, dict):
+                raise ValueError(f"Message {i} must be a dictionary")
+
+            if "role" not in message:
+                raise ValueError(f"Message {i} must have a 'role' field")
+
+            if "content" not in message:
+                raise ValueError(f"Message {i} must have a 'content' field")
diff --git a/litellm/llms/azure_ai/anthropic/count_tokens/__init__.py b/litellm/llms/azure_ai/anthropic/count_tokens/__init__.py
new file mode 100644
index 00000000000..9605d401f8e
--- /dev/null
+++ b/litellm/llms/azure_ai/anthropic/count_tokens/__init__.py
@@ -0,0 +1,19 @@
+"""
+Azure AI Anthropic CountTokens API implementation.
+"""
+
+from litellm.llms.azure_ai.anthropic.count_tokens.handler import (
+    AzureAIAnthropicCountTokensHandler,
+)
+from litellm.llms.azure_ai.anthropic.count_tokens.token_counter import (
+    AzureAIAnthropicTokenCounter,
+)
+from litellm.llms.azure_ai.anthropic.count_tokens.transformation import (
+    AzureAIAnthropicCountTokensConfig,
+)
+
+__all__ = [
+    "AzureAIAnthropicCountTokensHandler",
+    "AzureAIAnthropicCountTokensConfig",
+    "AzureAIAnthropicTokenCounter",
+]
diff --git a/litellm/llms/azure_ai/anthropic/count_tokens/handler.py b/litellm/llms/azure_ai/anthropic/count_tokens/handler.py
new file mode 100644
index 00000000000..099f92144ee
--- /dev/null
+++ b/litellm/llms/azure_ai/anthropic/count_tokens/handler.py
@@ -0,0 +1,131 @@
+"""
+Azure AI Anthropic CountTokens API handler.
+
+Uses httpx for HTTP requests with Azure authentication.
+"""
+
+from typing import Any, Dict, List, Optional, Union
+
+import httpx
+
+import litellm
+from litellm._logging import verbose_logger
+from litellm.llms.anthropic.common_utils import AnthropicError
+from litellm.llms.azure_ai.anthropic.count_tokens.transformation import (
+    AzureAIAnthropicCountTokensConfig,
+)
+from litellm.llms.custom_httpx.http_handler import get_async_httpx_client
+
+
+class AzureAIAnthropicCountTokensHandler(AzureAIAnthropicCountTokensConfig):
+    """
+    Handler for Azure AI Anthropic CountTokens API requests.
+
+    Uses httpx for HTTP requests with Azure authentication.
+    """
+
+    async def handle_count_tokens_request(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        api_key: str,
+        api_base: str,
+        litellm_params: Optional[Dict[str, Any]] = None,
+        timeout: Optional[Union[float, httpx.Timeout]] = None,
+    ) -> Dict[str, Any]:
+        """
+        Handle a CountTokens request using httpx with Azure authentication.
+
+        Args:
+            model: The model identifier (e.g., "claude-3-5-sonnet")
+            messages: The messages to count tokens for
+            api_key: The Azure AI API key
+            api_base: The Azure AI API base URL
+            litellm_params: Optional LiteLLM parameters
+            timeout: Optional timeout for the request (defaults to litellm.request_timeout)
+
+        Returns:
+            Dictionary containing token count response
+
+        Raises:
+            AnthropicError: If the API request fails
+        """
+        try:
+            # Validate the request
+            self.validate_request(model, messages)
+
+            verbose_logger.debug(
+                f"Processing Azure AI Anthropic CountTokens request for model: {model}"
+            )
+
+            # Transform request to Anthropic format
+            request_body = self.transform_request_to_count_tokens(
+                model=model,
+                messages=messages,
+            )
+
+            verbose_logger.debug(f"Transformed request: {request_body}")
+
+            # Get endpoint URL
+            endpoint_url = self.get_count_tokens_endpoint(api_base)
+
+            verbose_logger.debug(f"Making request to: {endpoint_url}")
+
+            # Get required headers with Azure authentication
+            headers = self.get_required_headers(
+                api_key=api_key,
+                litellm_params=litellm_params,
+            )
+
+            # Use LiteLLM's async httpx client
+            async_client = get_async_httpx_client(
+                llm_provider=litellm.LlmProviders.AZURE_AI
+            )
+
+            # Use provided timeout or fall back to litellm.request_timeout
+            request_timeout = timeout if timeout is not None else litellm.request_timeout
+
+            response = await async_client.post(
+                endpoint_url,
+                headers=headers,
+                json=request_body,
+                timeout=request_timeout,
+            )
+
+            verbose_logger.debug(f"Response status: {response.status_code}")
+
+            if response.status_code != 200:
+                error_text = response.text
+                verbose_logger.error(f"Azure AI Anthropic API error: {error_text}")
+                raise AnthropicError(
+                    status_code=response.status_code,
+                    message=error_text,
+                )
+
+            azure_response = response.json()
+
+            verbose_logger.debug(f"Azure AI Anthropic response: {azure_response}")
+
+            # Transform response
+            final_response = self.transform_response(azure_response)
+
+            verbose_logger.debug(f"Final response: {final_response}")
+
+            return final_response
+
+        except AnthropicError:
+            # Re-raise Anthropic exceptions as-is
+            raise
+        except httpx.HTTPStatusError as e:
+            # HTTP errors - preserve the actual status code
+            verbose_logger.error(f"HTTP error in CountTokens handler: {str(e)}")
+            raise AnthropicError(
+                status_code=e.response.status_code,
+                message=e.response.text,
+            )
+        except Exception as e:
+            verbose_logger.error(f"Error in CountTokens handler: {str(e)}")
+            raise AnthropicError(
+                status_code=500,
+                message=f"CountTokens processing error: {str(e)}",
+            )
diff --git a/litellm/llms/azure_ai/anthropic/count_tokens/token_counter.py b/litellm/llms/azure_ai/anthropic/count_tokens/token_counter.py
new file mode 100644
index 00000000000..14f92800079
--- /dev/null
+++ b/litellm/llms/azure_ai/anthropic/count_tokens/token_counter.py
@@ -0,0 +1,119 @@
+"""
+Azure AI Anthropic Token Counter implementation using the CountTokens API.
+"""
+
+import os
+from typing import Any, Dict, List, Optional
+
+from litellm._logging import verbose_logger
+from litellm.llms.azure_ai.anthropic.count_tokens.handler import (
+    AzureAIAnthropicCountTokensHandler,
+)
+from litellm.llms.base_llm.base_utils import BaseTokenCounter
+from litellm.types.utils import LlmProviders, TokenCountResponse
+
+# Global handler instance - reuse across all token counting requests
+azure_ai_anthropic_count_tokens_handler = AzureAIAnthropicCountTokensHandler()
+
+
+class AzureAIAnthropicTokenCounter(BaseTokenCounter):
+    """Token counter implementation for Azure AI Anthropic provider using the CountTokens API."""
+
+    def should_use_token_counting_api(
+        self,
+        custom_llm_provider: Optional[str] = None,
+    ) -> bool:
+        return custom_llm_provider == LlmProviders.AZURE_AI.value
+
+    async def count_tokens(
+        self,
+        model_to_use: str,
+        messages: Optional[List[Dict[str, Any]]],
+        contents: Optional[List[Dict[str, Any]]],
+        deployment: Optional[Dict[str, Any]] = None,
+        request_model: str = "",
+    ) -> Optional[TokenCountResponse]:
+        """
+        Count tokens using Azure AI Anthropic's CountTokens API.
+
+        Args:
+            model_to_use: The model identifier
+            messages: The messages to count tokens for
+            contents: Alternative content format (not used for Anthropic)
+            deployment: Deployment configuration containing litellm_params
+            request_model: The original request model name
+
+        Returns:
+            TokenCountResponse with token count, or None if counting fails
+        """
+        from litellm.llms.anthropic.common_utils import AnthropicError
+
+        if not messages:
+            return None
+
+        deployment = deployment or {}
+        litellm_params = deployment.get("litellm_params", {})
+
+        # Get Azure AI API key from deployment config or environment
+        api_key = litellm_params.get("api_key")
+        if not api_key:
+            api_key = os.getenv("AZURE_AI_API_KEY")
+
+        # Get API base from deployment config or environment
+        api_base = litellm_params.get("api_base")
+        if not api_base:
+            api_base = os.getenv("AZURE_AI_API_BASE")
+
+        if not api_key:
+            verbose_logger.warning("No Azure AI API key found for token counting")
+            return None
+
+        if not api_base:
+            verbose_logger.warning("No Azure AI API base found for token counting")
+            return None
+
+        try:
+            result = await azure_ai_anthropic_count_tokens_handler.handle_count_tokens_request(
+                model=model_to_use,
+                messages=messages,
+                api_key=api_key,
+                api_base=api_base,
+                litellm_params=litellm_params,
+            )
+
+            if result is not None:
+                return TokenCountResponse(
+                    total_tokens=result.get("input_tokens", 0),
+                    request_model=request_model,
+                    model_used=model_to_use,
+                    tokenizer_type="azure_ai_anthropic_api",
+                    original_response=result,
+                )
+        except AnthropicError as e:
+            verbose_logger.warning(
+                f"Azure AI Anthropic CountTokens API error: status={e.status_code}, message={e.message}"
+            )
+            return TokenCountResponse(
+                total_tokens=0,
+                request_model=request_model,
+                model_used=model_to_use,
+                tokenizer_type="azure_ai_anthropic_api",
+                error=True,
+                error_message=e.message,
+                status_code=e.status_code,
+            )
+        except Exception as e:
+            verbose_logger.warning(
+                f"Error calling Azure AI Anthropic CountTokens API: {e}"
+            )
+            return TokenCountResponse(
+                total_tokens=0,
+                request_model=request_model,
+                model_used=model_to_use,
+                tokenizer_type="azure_ai_anthropic_api",
+                error=True,
+                error_message=str(e),
+                status_code=500,
+            )
+
+        return None
diff --git a/litellm/llms/azure_ai/anthropic/count_tokens/transformation.py b/litellm/llms/azure_ai/anthropic/count_tokens/transformation.py
new file mode 100644
index 00000000000..e284595cc8a
--- /dev/null
+++ b/litellm/llms/azure_ai/anthropic/count_tokens/transformation.py
@@ -0,0 +1,88 @@
+"""
+Azure AI Anthropic CountTokens API transformation logic.
+
+Extends the base Anthropic CountTokens transformation with Azure authentication.
+"""
+
+from typing import Any, Dict, Optional
+
+from litellm.constants import ANTHROPIC_TOKEN_COUNTING_BETA_VERSION
+from litellm.llms.anthropic.count_tokens.transformation import (
+    AnthropicCountTokensConfig,
+)
+from litellm.llms.azure.common_utils import BaseAzureLLM
+from litellm.types.router import GenericLiteLLMParams
+
+
+class AzureAIAnthropicCountTokensConfig(AnthropicCountTokensConfig):
+    """
+    Configuration and transformation logic for Azure AI Anthropic CountTokens API.
+
+    Extends AnthropicCountTokensConfig with Azure authentication.
+    Azure AI Anthropic uses the same endpoint format but with Azure auth headers.
+    """
+
+    def get_required_headers(
+        self,
+        api_key: str,
+        litellm_params: Optional[Dict[str, Any]] = None,
+    ) -> Dict[str, str]:
+        """
+        Get the required headers for the Azure AI Anthropic CountTokens API.
+
+        Uses Azure authentication (api-key header) instead of Anthropic's x-api-key.
+
+        Args:
+            api_key: The Azure AI API key
+            litellm_params: Optional LiteLLM parameters for additional auth config
+
+        Returns:
+            Dictionary of required headers with Azure authentication
+        """
+        # Start with base headers
+        headers = {
+            "Content-Type": "application/json",
+            "anthropic-version": "2023-06-01",
+            "anthropic-beta": ANTHROPIC_TOKEN_COUNTING_BETA_VERSION,
+        }
+
+        # Use Azure authentication
+        litellm_params = litellm_params or {}
+        if "api_key" not in litellm_params:
+            litellm_params["api_key"] = api_key
+
+        litellm_params_obj = GenericLiteLLMParams(**litellm_params)
+
+        # Get Azure auth headers
+        azure_headers = BaseAzureLLM._base_validate_azure_environment(
+            headers={}, litellm_params=litellm_params_obj
+        )
+
+        # Merge Azure auth headers
+        headers.update(azure_headers)
+
+        return headers
+
+    def get_count_tokens_endpoint(self, api_base: str) -> str:
+        """
+        Get the Azure AI Anthropic CountTokens API endpoint.
+
+        Args:
+            api_base: The Azure AI API base URL 
+                      (e.g., https://my-resource.services.ai.azure.com or
+                       https://my-resource.services.ai.azure.com/anthropic)
+
+        Returns:
+            The endpoint URL for the CountTokens API
+        """
+        # Azure AI Anthropic endpoint format:
+        # https://<resource>.services.ai.azure.com/anthropic/v1/messages/count_tokens
+        api_base = api_base.rstrip("/")
+
+        # Ensure the URL has /anthropic path
+        if not api_base.endswith("/anthropic"):
+            if "/anthropic" not in api_base:
+                api_base = f"{api_base}/anthropic"
+
+        # Add the count_tokens path
+        return f"{api_base}/v1/messages/count_tokens"
diff --git a/litellm/llms/azure_ai/common_utils.py b/litellm/llms/azure_ai/common_utils.py
index 1a88926ae39..281e0579c77 100644
--- a/litellm/llms/azure_ai/common_utils.py
+++ b/litellm/llms/azure_ai/common_utils.py
@@ -4,123 +4,6 @@
 from litellm.llms.base_llm.base_utils import BaseLLMModelInfo, BaseTokenCounter
 from litellm.secret_managers.main import get_secret_str
 from litellm.types.llms.openai import AllMessageValues
-from litellm.types.utils import TokenCountResponse
-
-
-class AzureAIAnthropicTokenCounter(BaseTokenCounter):
-    """Token counter implementation for Azure AI Anthropic provider using the CountTokens API."""
-
-    def should_use_token_counting_api(
-        self,
-        custom_llm_provider: Optional[str] = None,
-    ) -> bool:
-        from litellm.types.utils import LlmProviders
-
-        return custom_llm_provider == LlmProviders.AZURE_AI.value
-
-    async def count_tokens(
-        self,
-        model_to_use: str,
-        messages: Optional[List[Dict[str, Any]]],
-        contents: Optional[List[Dict[str, Any]]],
-        deployment: Optional[Dict[str, Any]] = None,
-        request_model: str = "",
-    ) -> Optional[TokenCountResponse]:
-        """
-        Count tokens using Azure AI Anthropic's CountTokens API.
-
-        Args:
-            model_to_use: The model identifier
-            messages: The messages to count tokens for
-            contents: Alternative content format (not used for Anthropic)
-            deployment: Deployment configuration containing litellm_params
-            request_model: The original request model name
-
-        Returns:
-            TokenCountResponse with token count, or None if counting fails
-        """
-        import os
-
-        from litellm._logging import verbose_logger
-        from litellm.llms.anthropic.common_utils import AnthropicError
-        from litellm.llms.azure_ai.anthropic.count_tokens.handler import (
-            AzureAIAnthropicCountTokensHandler,
-        )
-
-        if not messages:
-            return None
-
-        deployment = deployment or {}
-        litellm_params = deployment.get("litellm_params", {})
-
-        # Get Azure AI API key from deployment config or environment
-        api_key = litellm_params.get("api_key")
-        if not api_key:
-            api_key = os.getenv("AZURE_AI_API_KEY")
-
-        # Get API base from deployment config or environment
-        api_base = litellm_params.get("api_base")
-        if not api_base:
-            api_base = os.getenv("AZURE_AI_API_BASE")
-
-        if not api_key:
-            verbose_logger.warning(
-                "No Azure AI API key found for token counting"
-            )
-            return None
-
-        if not api_base:
-            verbose_logger.warning(
-                "No Azure AI API base found for token counting"
-            )
-            return None
-
-        try:
-            handler = AzureAIAnthropicCountTokensHandler()
-            result = await handler.handle_count_tokens_request(
-                model=model_to_use,
-                messages=messages,
-                api_key=api_key,
-                api_base=api_base,
-                litellm_params=litellm_params,
-            )
-
-            if result is not None:
-                return TokenCountResponse(
-                    total_tokens=result.get("input_tokens", 0),
-                    request_model=request_model,
-                    model_used=model_to_use,
-                    tokenizer_type="azure_ai_anthropic_api",
-                    original_response=result,
-                )
-        except AnthropicError as e:
-            verbose_logger.warning(
-                f"Azure AI Anthropic CountTokens API error: status={e.status_code}, message={e.message}"
-            )
-            return TokenCountResponse(
-                total_tokens=0,
-                request_model=request_model,
-                model_used=model_to_use,
-                tokenizer_type="azure_ai_anthropic_api",
-                error=True,
-                error_message=e.message,
-                status_code=e.status_code,
-            )
-        except Exception as e:
-            verbose_logger.warning(
-                f"Error calling Azure AI Anthropic CountTokens API: {e}"
-            )
-            return TokenCountResponse(
-                total_tokens=0,
-                request_model=request_model,
-                model_used=model_to_use,
-                tokenizer_type="azure_ai_anthropic_api",
-                error=True,
-                error_message=str(e),
-                status_code=500,
-            )
-
-        return None
 
 
 class AzureFoundryModelInfo(BaseLLMModelInfo):
@@ -142,9 +25,7 @@ def get_azure_ai_route(model: str) -> Literal["agents", "default"]:
 
     @staticmethod
     def get_api_base(api_base: Optional[str] = None) -> Optional[str]:
-        return (
-            api_base or litellm.api_base or get_secret_str("AZURE_AI_API_BASE")
-        )
+        return api_base or litellm.api_base or get_secret_str("AZURE_AI_API_BASE")
 
     @staticmethod
     def get_api_key(api_key: Optional[str] = None) -> Optional[str]:
@@ -171,6 +52,10 @@ def get_token_counter(self) -> Optional[BaseTokenCounter]:
         """
         # Only return token counter for Claude models
         if self._model and "claude" in self._model.lower():
+            from litellm.llms.azure_ai.anthropic.count_tokens.token_counter import (
+                AzureAIAnthropicTokenCounter,
+            )
+
             return AzureAIAnthropicTokenCounter()
         return None
 

From f8ee2f1a0334e5779957b8211125f258dd92a589 Mon Sep 17 00:00:00 2001
From: Ishaan Jaffer <ishaanjaffer0324@gmail.com>
Date: Tue, 20 Jan 2026 12:44:29 -0800
Subject: [PATCH 07/13] fix: fix pyrightI

---
 litellm/llms/azure_ai/common_utils.py | 11 +++++++++++
 litellm/utils.py                      |  4 ++++
 2 files changed, 15 insertions(+)

diff --git a/litellm/llms/azure_ai/common_utils.py b/litellm/llms/azure_ai/common_utils.py
index 281e0579c77..2c711c36a0f 100644
--- a/litellm/llms/azure_ai/common_utils.py
+++ b/litellm/llms/azure_ai/common_utils.py
@@ -59,6 +59,17 @@ def get_token_counter(self) -> Optional[BaseTokenCounter]:
             return AzureAIAnthropicTokenCounter()
         return None
 
+    def get_models(
+        self, api_key: Optional[str] = None, api_base: Optional[str] = None
+    ) -> List[str]:
+        """
+        Returns a list of models supported by Azure AI.
+        
+        Azure AI doesn't have a standard model listing endpoint,
+        so this returns an empty list.
+        """
+        return []
+
     #########################################################
     # Not implemented methods
     #########################################################
diff --git a/litellm/utils.py b/litellm/utils.py
index 3e88c6fe9e3..948d6e97f4d 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -8244,6 +8244,10 @@ def get_provider_model_info(
             return litellm.ClarifaiConfig()
         elif LlmProviders.BEDROCK == provider:
             return litellm.llms.bedrock.common_utils.BedrockModelInfo()
+        elif LlmProviders.AZURE_AI == provider:
+            from litellm.llms.azure_ai.common_utils import AzureFoundryModelInfo
+
+            return AzureFoundryModelInfo(model=model)
         return None
 
     @staticmethod

From d4ca8b803bfff88e2290a5e0a7583761fe611426 Mon Sep 17 00:00:00 2001
From: Ishaan Jaffer <ishaanjaffer0324@gmail.com>
Date: Tue, 20 Jan 2026 13:30:16 -0800
Subject: [PATCH 08/13] fix Code QA issues

---
 litellm/llms/anthropic/common_utils.py |  3 +-
 litellm/llms/azure_ai/common_utils.py  |  2 +-
 litellm/proxy/proxy_config.yaml        | 58 ++++++++------------------
 3 files changed, 19 insertions(+), 44 deletions(-)

diff --git a/litellm/llms/anthropic/common_utils.py b/litellm/llms/anthropic/common_utils.py
index 3a8da2a7fd6..ccfbc36c080 100644
--- a/litellm/llms/anthropic/common_utils.py
+++ b/litellm/llms/anthropic/common_utils.py
@@ -2,7 +2,7 @@
 This file contains common utils for anthropic calls.
 """
 
-from typing import Any, Dict, List, Optional, Union
+from typing import Dict, List, Optional, Union
 
 import httpx
 
@@ -18,7 +18,6 @@
     AnthropicMcpServerTool,
 )
 from litellm.types.llms.openai import AllMessageValues
-from litellm.types.utils import TokenCountResponse
 
 
 class AnthropicError(BaseLLMException):
diff --git a/litellm/llms/azure_ai/common_utils.py b/litellm/llms/azure_ai/common_utils.py
index 2c711c36a0f..01a3f5766c6 100644
--- a/litellm/llms/azure_ai/common_utils.py
+++ b/litellm/llms/azure_ai/common_utils.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, List, Literal, Optional
+from typing import List, Literal, Optional
 
 import litellm
 from litellm.llms.base_llm.base_utils import BaseLLMModelInfo, BaseTokenCounter
diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml
index 646a062b720..958ddbf613c 100644
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@@ -1,51 +1,27 @@
 model_list:
-  - model_name: gemini/*
+  # Anthropic direct
+  - model_name: anthropic-claude
     litellm_params:
-      model: gemini/*
-  - model_name: -claude-sonnet-4-5-20250929
-    litellm_params:
-      model: bedrock/invoke/us.anthropic.claude-sonnet-4-5-20250929-v1:0
-    model_info:
-      cache_creation_input_token_cost: 3.75e-06
-      cache_read_input_token_cost: 3e-07
-      input_cost_per_token: 3e-06
-      input_cost_per_token_above_200k_tokens: 6e-06
-      output_cost_per_token_above_200k_tokens: 2.25e-05
-      cache_creation_input_token_cost_above_200k_tokens: 7.5e-06
-      cache_read_input_token_cost_above_200k_tokens: 6e-07
-      litellm_provider: bedrock_converse
-      max_input_tokens: 200000
-      max_output_tokens: 64000
-      max_tokens: 200000
-      mode: chat
-      output_cost_per_token: 1.5e-05
-      search_context_cost_per_query:
-        search_context_size_high: 0.01
-        search_context_size_low: 0.01
-        search_context_size_medium: 0.01
-      supports_assistant_prefill: true
-      supports_computer_use: true
-      supports_function_calling: true
-      supports_pdf_input: true
-      supports_prompt_caching: true
-      supports_reasoning: true
-      supports_response_schema: true
-      supports_tool_choice: true
-      supports_vision: true
-      tool_use_system_prompt_tokens: 346
+      model: anthropic/claude-sonnet-4-20250514
+      api_key: os.environ/ANTHROPIC_API_KEY
 
-  - model_name: us.anthropic.claude-sonnet-4-20250514-v1:0
+  # Azure AI Anthropic
+  - model_name: azure-ai-claude
     litellm_params:
-      model: bedrock/converse/us.anthropic.claude-sonnet-4-20250514-v1:0
-    model_info:
-      litellm_provider: bedrock_converse
-      mode: chat
-  - model_name: claude-sonnet-4-5-20250929
+      model: azure_ai/claude-3-5-sonnet
+      api_base: https://krish-mh44t553-eastus2.services.ai.azure.com/
+      api_key: os.environ/AZURE_ANTHROPIC_API_KEY
+
+  # Azure AI Anthropic (alternate endpoint format)
+  - model_name: claude-4.5-haiku
     litellm_params:
-      model: azure_ai/claude-opus-4-5
-      api_base: https://krish-mh44t553-eastus2.services.ai.azure.com
+      model: anthropic/claude-haiku-4-5
+      api_base: https://krish-mh44t553-eastus2.services.ai.azure.com/anthropic/v1/messages
+      api_version: "2023-06-01"
       api_key: os.environ/AZURE_ANTHROPIC_API_KEY
 
+
+
 # Search Tools Configuration - Define search providers for WebSearch interception
 # search_tools:
 #   - search_tool_name: "my-perplexity-search"

From 6b568cfed4f0387409c2d839b7bbda01fa4f874a Mon Sep 17 00:00:00 2001
From: Ishaan Jaffer <ishaanjaffer0324@gmail.com>
Date: Tue, 20 Jan 2026 16:37:39 -0800
Subject: [PATCH 09/13] feat: add OAUTH handling ant

---
 litellm/types/llms/anthropic.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/litellm/types/llms/anthropic.py b/litellm/types/llms/anthropic.py
index 779a6950d92..0e687be660f 100644
--- a/litellm/types/llms/anthropic.py
+++ b/litellm/types/llms/anthropic.py
@@ -642,4 +642,8 @@ class ANTHROPIC_BETA_HEADER_VALUES(str, Enum):
 # Effort beta header constant
 ANTHROPIC_EFFORT_BETA_HEADER = "effort-2025-11-24"
 
+# OAuth constants
+ANTHROPIC_OAUTH_TOKEN_PREFIX = "sk-ant-oat"
+ANTHROPIC_OAUTH_BETA_HEADER = "oauth-2025-04-20"
+
 

From d179d8cf9b359c1fd03dcda86b615d934e891a88 Mon Sep 17 00:00:00 2001
From: Ishaan Jaffer <ishaanjaffer0324@gmail.com>
Date: Tue, 20 Jan 2026 16:42:56 -0800
Subject: [PATCH 10/13] feat: Oauth handling Ant

---
 litellm/llms/anthropic/common_utils.py        | 29 +++++++++++++++++++
 .../messages/transformation.py                |  9 +++++-
 2 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/litellm/llms/anthropic/common_utils.py b/litellm/llms/anthropic/common_utils.py
index ccfbc36c080..18b56c199f1 100644
--- a/litellm/llms/anthropic/common_utils.py
+++ b/litellm/llms/anthropic/common_utils.py
@@ -7,6 +7,7 @@
 import httpx
 
 import litellm
+from litellm._logging import verbose_proxy_logger
 from litellm.litellm_core_utils.prompt_templates.common_utils import (
     get_file_ids_from_messages,
 )
@@ -14,12 +15,38 @@
 from litellm.llms.base_llm.chat.transformation import BaseLLMException
 from litellm.types.llms.anthropic import (
     ANTHROPIC_HOSTED_TOOLS,
+    ANTHROPIC_OAUTH_BETA_HEADER,
+    ANTHROPIC_OAUTH_TOKEN_PREFIX,
     AllAnthropicToolsValues,
     AnthropicMcpServerTool,
 )
 from litellm.types.llms.openai import AllMessageValues
 
 
+def optionally_handle_anthropic_oauth(
+    headers: dict, api_key: Optional[str]
+) -> tuple[dict, Optional[str]]:
+    """
+    Handle Anthropic OAuth token detection and header setup.
+
+    If an OAuth token is detected in the Authorization header, extracts it
+    and sets the required OAuth headers.
+
+    Args:
+        headers: Request headers dict
+        api_key: Current API key (may be None)
+
+    Returns:
+        Tuple of (updated headers, api_key)
+    """
+    auth_header = headers.get("authorization", "")
+    if auth_header and auth_header.startswith(f"Bearer {ANTHROPIC_OAUTH_TOKEN_PREFIX}"):
+        api_key = auth_header.replace("Bearer ", "")
+        headers["anthropic-beta"] = ANTHROPIC_OAUTH_BETA_HEADER
+        headers["anthropic-dangerous-direct-browser-access"] = "true"
+    return headers, api_key
+
+
 class AnthropicError(BaseLLMException):
     def __init__(
         self,
@@ -371,6 +398,8 @@ def validate_environment(
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
     ) -> Dict:
+        # Check for Anthropic OAuth token in headers
+        headers, api_key = optionally_handle_anthropic_oauth(headers=headers, api_key=api_key)
         if api_key is None:
             raise litellm.AuthenticationError(
                 message="Missing Anthropic API Key - A call is being made to anthropic but no key is set either in the environment variables or via params. Please set `ANTHROPIC_API_KEY` in your environment vars",
diff --git a/litellm/llms/anthropic/experimental_pass_through/messages/transformation.py b/litellm/llms/anthropic/experimental_pass_through/messages/transformation.py
index f67e4c8382c..7135102db01 100644
--- a/litellm/llms/anthropic/experimental_pass_through/messages/transformation.py
+++ b/litellm/llms/anthropic/experimental_pass_through/messages/transformation.py
@@ -17,7 +17,11 @@
 from litellm.types.llms.anthropic_tool_search import get_tool_search_beta_header
 from litellm.types.router import GenericLiteLLMParams
 
-from ...common_utils import AnthropicError, AnthropicModelInfo
+from ...common_utils import (
+    AnthropicError,
+    AnthropicModelInfo,
+    optionally_handle_anthropic_oauth,
+)
 
 DEFAULT_ANTHROPIC_API_BASE = "https://api.anthropic.com"
 DEFAULT_ANTHROPIC_API_VERSION = "2023-06-01"
@@ -68,8 +72,11 @@ def validate_anthropic_messages_environment(
     ) -> Tuple[dict, Optional[str]]:
         import os
 
+        # Check for Anthropic OAuth token in Authorization header
+        headers, api_key = optionally_handle_anthropic_oauth(headers=headers, api_key=api_key)
         if api_key is None:
             api_key = os.getenv("ANTHROPIC_API_KEY")
+
         if "x-api-key" not in headers and api_key:
             headers["x-api-key"] = api_key
         if "anthropic-version" not in headers:

From 6562f9dd69a3411714961ee33baf8b6fd6502ce8 Mon Sep 17 00:00:00 2001
From: Ishaan Jaffer <ishaanjaffer0324@gmail.com>
Date: Tue, 20 Jan 2026 17:02:01 -0800
Subject: [PATCH 11/13] test anthopic common utils

---
 .../anthropic/test_anthropic_common_utils.py  | 84 +++++++++++++++++++
 1 file changed, 84 insertions(+)
 create mode 100644 tests/test_litellm/llms/anthropic/test_anthropic_common_utils.py

diff --git a/tests/test_litellm/llms/anthropic/test_anthropic_common_utils.py b/tests/test_litellm/llms/anthropic/test_anthropic_common_utils.py
new file mode 100644
index 00000000000..0a397d116e7
--- /dev/null
+++ b/tests/test_litellm/llms/anthropic/test_anthropic_common_utils.py
@@ -0,0 +1,84 @@
+"""
+Tests for Anthropic OAuth token handling for Claude Code Max integration.
+"""
+
+import os
+import sys
+
+# Add litellm to path
+sys.path.insert(0, os.path.abspath("../../../../.."))
+
+# Fake OAuth token for testing (not a real secret)
+FAKE_OAUTH_TOKEN = "sk-ant-oat01-fake-token-for-testing-123456789abcdef"
+
+
+def test_oauth_detection_in_common_utils():
+    """Test 1: OAuth token detection in common_utils"""
+    from litellm.llms.anthropic.common_utils import optionally_handle_anthropic_oauth
+
+    headers = {"authorization": f"Bearer {FAKE_OAUTH_TOKEN}"}
+    updated_headers, extracted_api_key = optionally_handle_anthropic_oauth(headers, None)
+
+    assert extracted_api_key == FAKE_OAUTH_TOKEN
+    assert updated_headers["anthropic-beta"] == "oauth-2025-04-20"
+    assert updated_headers["anthropic-dangerous-direct-browser-access"] == "true"
+
+
+def test_oauth_integration_in_validate_environment():
+    """Test 2: OAuth integration in AnthropicConfig validate_environment"""
+    from litellm.llms.anthropic.common_utils import AnthropicModelInfo
+
+    config = AnthropicModelInfo()
+    headers = {"authorization": f"Bearer {FAKE_OAUTH_TOKEN}"}
+
+    updated_headers = config.validate_environment(
+        headers=headers,
+        model="claude-3-haiku-20240307",
+        messages=[{"role": "user", "content": "Hello"}],
+        optional_params={},
+        litellm_params={},
+        api_key=None,
+        api_base=None,
+    )
+
+    assert updated_headers["x-api-key"] == FAKE_OAUTH_TOKEN
+    assert updated_headers["anthropic-dangerous-direct-browser-access"] == "true"
+
+
+def test_oauth_detection_in_messages_transformation():
+    """Test 3: OAuth detection in messages transformation"""
+    from litellm.llms.anthropic.experimental_pass_through.messages.transformation import (
+        AnthropicMessagesConfig,
+    )
+
+    config = AnthropicMessagesConfig()
+    headers = {"authorization": f"Bearer {FAKE_OAUTH_TOKEN}"}
+
+    updated_headers, _ = config.validate_anthropic_messages_environment(
+        headers=headers,
+        model="claude-3-haiku-20240307",
+        messages=[{"role": "user", "content": "Hello"}],
+        optional_params={},
+        litellm_params={},
+        api_key=None,
+        api_base=None,
+    )
+
+    assert updated_headers["x-api-key"] == FAKE_OAUTH_TOKEN
+    assert "oauth-2025-04-20" in updated_headers["anthropic-beta"]
+    assert updated_headers["anthropic-dangerous-direct-browser-access"] == "true"
+
+
+def test_regular_api_keys_still_work():
+    """Test 4: Regular API keys still work (regression test)"""
+    from litellm.llms.anthropic.common_utils import optionally_handle_anthropic_oauth
+
+    regular_key = "sk-ant-api03-regular-key-123"
+    headers = {"authorization": f"Bearer {regular_key}"}
+
+    updated_headers, extracted_api_key = optionally_handle_anthropic_oauth(headers, regular_key)
+
+    # Regular key should be unchanged
+    assert extracted_api_key == regular_key
+    # OAuth headers should NOT be added
+    assert "anthropic-dangerous-direct-browser-access" not in updated_headers
\ No newline at end of file

From a5458a6d179301d9d9daa45dcf36f16dc93defe0 Mon Sep 17 00:00:00 2001
From: Ishaan Jaffer <ishaanjaffer0324@gmail.com>
Date: Tue, 20 Jan 2026 17:13:53 -0800
Subject: [PATCH 12/13] fix code QA

---
 litellm/llms/anthropic/common_utils.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/litellm/llms/anthropic/common_utils.py b/litellm/llms/anthropic/common_utils.py
index 18b56c199f1..cb23d21fbc9 100644
--- a/litellm/llms/anthropic/common_utils.py
+++ b/litellm/llms/anthropic/common_utils.py
@@ -7,7 +7,6 @@
 import httpx
 
 import litellm
-from litellm._logging import verbose_proxy_logger
 from litellm.litellm_core_utils.prompt_templates.common_utils import (
     get_file_ids_from_messages,
 )

From fa4dbc37aaa509b312ae2bdebbc4f149bec6b2f6 Mon Sep 17 00:00:00 2001
From: Ishaan Jaffer <ishaanjaffer0324@gmail.com>
Date: Tue, 20 Jan 2026 17:20:57 -0800
Subject: [PATCH 13/13] docs

---
 docs/my-website/docs/proxy/config_settings.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/docs/my-website/docs/proxy/config_settings.md b/docs/my-website/docs/proxy/config_settings.md
index f76fd214682..67fffc13e4b 100644
--- a/docs/my-website/docs/proxy/config_settings.md
+++ b/docs/my-website/docs/proxy/config_settings.md
@@ -397,6 +397,7 @@ router_settings:
 | AUDIO_SPEECH_CHUNK_SIZE | Chunk size for audio speech processing. Default is 1024
 | ANTHROPIC_API_KEY | API key for Anthropic service
 | ANTHROPIC_API_BASE | Base URL for Anthropic API. Default is https://api.anthropic.com
+| ANTHROPIC_TOKEN_COUNTING_BETA_VERSION | Beta version header for Anthropic token counting API. Default is `token-counting-2024-11-01`
 | AWS_ACCESS_KEY_ID | Access Key ID for AWS services
 | AWS_BATCH_ROLE_ARN | ARN of the AWS IAM role for batch operations
 | AWS_DEFAULT_REGION | Default AWS region for service interactions when AWS_REGION is not set
@@ -412,6 +413,8 @@ router_settings:
 | AWS_WEB_IDENTITY_TOKEN | Web identity token for AWS
 | AWS_WEB_IDENTITY_TOKEN_FILE | Path to file containing web identity token for AWS
 | AZURE_API_VERSION | Version of the Azure API being used
+| AZURE_AI_API_BASE | Base URL for Azure AI services (e.g., Azure AI Anthropic)
+| AZURE_AI_API_KEY | API key for Azure AI services (e.g., Azure AI Anthropic)
 | AZURE_AUTHORITY_HOST | Azure authority host URL
 | AZURE_CERTIFICATE_PASSWORD | Password for Azure OpenAI certificate
 | AZURE_CLIENT_ID | Client ID for Azure services