BerriAI · ishaan-jaff · Jan 21, 2026 · Jan 20, 2026 · Jan 20, 2026 · Jan 20, 2026
diff --git a/docs/my-website/docs/proxy/config_settings.md b/docs/my-website/docs/proxy/config_settings.md
@@ -397,6 +397,7 @@ router_settings:
 | AUDIO_SPEECH_CHUNK_SIZE | Chunk size for audio speech processing. Default is 1024
 | ANTHROPIC_API_KEY | API key for Anthropic service
 | ANTHROPIC_API_BASE | Base URL for Anthropic API. Default is https://api.anthropic.com
+| ANTHROPIC_TOKEN_COUNTING_BETA_VERSION | Beta version header for Anthropic token counting API. Default is `token-counting-2024-11-01`
 | AWS_ACCESS_KEY_ID | Access Key ID for AWS services
 | AWS_BATCH_ROLE_ARN | ARN of the AWS IAM role for batch operations
 | AWS_DEFAULT_REGION | Default AWS region for service interactions when AWS_REGION is not set
@@ -412,6 +413,8 @@ router_settings:
 | AWS_WEB_IDENTITY_TOKEN | Web identity token for AWS
 | AWS_WEB_IDENTITY_TOKEN_FILE | Path to file containing web identity token for AWS
 | AZURE_API_VERSION | Version of the Azure API being used
+| AZURE_AI_API_BASE | Base URL for Azure AI services (e.g., Azure AI Anthropic)
+| AZURE_AI_API_KEY | API key for Azure AI services (e.g., Azure AI Anthropic)
 | AZURE_AUTHORITY_HOST | Azure authority host URL
 | AZURE_CERTIFICATE_PASSWORD | Password for Azure OpenAI certificate
 | AZURE_CLIENT_ID | Client ID for Azure services

diff --git a/litellm/constants.py b/litellm/constants.py
@@ -323,6 +323,9 @@
 EMAIL_BUDGET_ALERT_MAX_SPEND_ALERT_PERCENTAGE = float(os.getenv("EMAIL_BUDGET_ALERT_MAX_SPEND_ALERT_PERCENTAGE", 0.8))  # 80% of max budget
 ############### LLM Provider Constants ###############
 ### ANTHROPIC CONSTANTS ###
+ANTHROPIC_TOKEN_COUNTING_BETA_VERSION = os.getenv(
+    "ANTHROPIC_TOKEN_COUNTING_BETA_VERSION", "token-counting-2024-11-01"
+)
 ANTHROPIC_SKILLS_API_BETA_VERSION = "skills-2025-10-02"
 ANTHROPIC_WEB_SEARCH_TOOL_MAX_USES = {
     "low": 1,

diff --git a/litellm/llms/anthropic/common_utils.py b/litellm/llms/anthropic/common_utils.py
@@ -2,7 +2,7 @@
 This file contains common utils for anthropic calls.
 """
 
-from typing import Any, Dict, List, Optional, Union
+from typing import Dict, List, Optional, Union
 
 import httpx
 
@@ -14,11 +14,36 @@
 from litellm.llms.base_llm.chat.transformation import BaseLLMException
 from litellm.types.llms.anthropic import (
     ANTHROPIC_HOSTED_TOOLS,
+    ANTHROPIC_OAUTH_BETA_HEADER,
+    ANTHROPIC_OAUTH_TOKEN_PREFIX,
     AllAnthropicToolsValues,
     AnthropicMcpServerTool,
 )
 from litellm.types.llms.openai import AllMessageValues
-from litellm.types.utils import TokenCountResponse
+
+
+def optionally_handle_anthropic_oauth(
+    headers: dict, api_key: Optional[str]
+) -> tuple[dict, Optional[str]]:
+    """
+    Handle Anthropic OAuth token detection and header setup.
+
+    If an OAuth token is detected in the Authorization header, extracts it
+    and sets the required OAuth headers.
+
+    Args:
+        headers: Request headers dict
+        api_key: Current API key (may be None)
+
+    Returns:
+        Tuple of (updated headers, api_key)
+    """
+    auth_header = headers.get("authorization", "")
+    if auth_header and auth_header.startswith(f"Bearer {ANTHROPIC_OAUTH_TOKEN_PREFIX}"):
+        api_key = auth_header.replace("Bearer ", "")
+        headers["anthropic-beta"] = ANTHROPIC_OAUTH_BETA_HEADER
+        headers["anthropic-dangerous-direct-browser-access"] = "true"
+    return headers, api_key
 
 
 class AnthropicError(BaseLLMException):
@@ -372,6 +397,8 @@ def validate_environment(
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
     ) -> Dict:
+        # Check for Anthropic OAuth token in headers
+        headers, api_key = optionally_handle_anthropic_oauth(headers=headers, api_key=api_key)
         if api_key is None:
             raise litellm.AuthenticationError(
                 message="Missing Anthropic API Key - A call is being made to anthropic but no key is set either in the environment variables or via params. Please set `ANTHROPIC_API_KEY` in your environment vars",
@@ -476,45 +503,11 @@ def get_token_counter(self) -> Optional[BaseTokenCounter]:
         Returns:
             AnthropicTokenCounter instance for this provider.
         """
-        return AnthropicTokenCounter()
-
-
-class AnthropicTokenCounter(BaseTokenCounter):
-    """Token counter implementation for Anthropic provider."""
-
-    def should_use_token_counting_api(
-        self, 
-        custom_llm_provider: Optional[str] = None,
-    ) -> bool:
-        from litellm.types.utils import LlmProviders
-        return custom_llm_provider == LlmProviders.ANTHROPIC.value
-
-    async def count_tokens(
-        self,
-        model_to_use: str,
-        messages: Optional[List[Dict[str, Any]]],
-        contents: Optional[List[Dict[str, Any]]],
-        deployment: Optional[Dict[str, Any]] = None,
-        request_model: str = "",
-    ) -> Optional[TokenCountResponse]:
-        from litellm.proxy.utils import count_tokens_with_anthropic_api
-
-        result = await count_tokens_with_anthropic_api(
-            model_to_use=model_to_use,
-            messages=messages,
-            deployment=deployment,
+        from litellm.llms.anthropic.count_tokens.token_counter import (
+            AnthropicTokenCounter,
         )
-
-        if result is not None:
-            return TokenCountResponse(
-                total_tokens=result.get("total_tokens", 0),
-                request_model=request_model,
-                model_used=model_to_use,
-                tokenizer_type=result.get("tokenizer_used", ""),
-                original_response=result,
-            )
-
-        return None
+
+        return AnthropicTokenCounter()
 
 
 def process_anthropic_headers(headers: Union[httpx.Headers, dict]) -> dict:

diff --git a/litellm/llms/anthropic/count_tokens/__init__.py b/litellm/llms/anthropic/count_tokens/__init__.py
@@ -0,0 +1,15 @@
+"""
+Anthropic CountTokens API implementation.
+"""
+
+from litellm.llms.anthropic.count_tokens.handler import AnthropicCountTokensHandler
+from litellm.llms.anthropic.count_tokens.token_counter import AnthropicTokenCounter
+from litellm.llms.anthropic.count_tokens.transformation import (
+    AnthropicCountTokensConfig,
+)
+
+__all__ = [
+    "AnthropicCountTokensHandler",
+    "AnthropicCountTokensConfig",
+    "AnthropicTokenCounter",
+]
diff --git a/litellm/llms/anthropic/count_tokens/handler.py b/litellm/llms/anthropic/count_tokens/handler.py
@@ -0,0 +1,126 @@
+"""
+Anthropic CountTokens API handler.
+
+Uses httpx for HTTP requests instead of the Anthropic SDK.
+"""
+
+from typing import Any, Dict, List, Optional, Union
+
+import httpx
+
+import litellm
+from litellm._logging import verbose_logger
+from litellm.llms.anthropic.common_utils import AnthropicError
+from litellm.llms.anthropic.count_tokens.transformation import (
+    AnthropicCountTokensConfig,
+)
+from litellm.llms.custom_httpx.http_handler import get_async_httpx_client
+
+
+class AnthropicCountTokensHandler(AnthropicCountTokensConfig):
+    """
+    Handler for Anthropic CountTokens API requests.
+
+    Uses httpx for HTTP requests, following the same pattern as BedrockCountTokensHandler.
+    """
+
+    async def handle_count_tokens_request(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        api_key: str,
+        api_base: Optional[str] = None,
+        timeout: Optional[Union[float, httpx.Timeout]] = None,
+    ) -> Dict[str, Any]:
+        """
+        Handle a CountTokens request using httpx.
+
+        Args:
+            model: The model identifier (e.g., "claude-3-5-sonnet-20241022")
+            messages: The messages to count tokens for
+            api_key: The Anthropic API key
+            api_base: Optional custom API base URL
+            timeout: Optional timeout for the request (defaults to litellm.request_timeout)
+
+        Returns:
+            Dictionary containing token count response
+
+        Raises:
+            AnthropicError: If the API request fails
+        """
+        try:
+            # Validate the request
+            self.validate_request(model, messages)
+
+            verbose_logger.debug(
+                f"Processing Anthropic CountTokens request for model: {model}"
+            )
+
+            # Transform request to Anthropic format
+            request_body = self.transform_request_to_count_tokens(
+                model=model,
+                messages=messages,
+            )
+
+            verbose_logger.debug(f"Transformed request: {request_body}")
+
+            # Get endpoint URL
+            endpoint_url = api_base or self.get_anthropic_count_tokens_endpoint()
+
+            verbose_logger.debug(f"Making request to: {endpoint_url}")
+
+            # Get required headers
+            headers = self.get_required_headers(api_key)
+
+            # Use LiteLLM's async httpx client
+            async_client = get_async_httpx_client(
+                llm_provider=litellm.LlmProviders.ANTHROPIC
+            )
+
+            # Use provided timeout or fall back to litellm.request_timeout
+            request_timeout = timeout if timeout is not None else litellm.request_timeout
+
+            response = await async_client.post(
+                endpoint_url,
+                headers=headers,
+                json=request_body,
+                timeout=request_timeout,
+            )
+
+            verbose_logger.debug(f"Response status: {response.status_code}")
+
+            if response.status_code != 200:
+                error_text = response.text
+                verbose_logger.error(f"Anthropic API error: {error_text}")
+                raise AnthropicError(
+                    status_code=response.status_code,
+                    message=error_text,
+                )
+
+            anthropic_response = response.json()
+
+            verbose_logger.debug(f"Anthropic response: {anthropic_response}")
+
+            # Transform response
+            final_response = self.transform_response(anthropic_response)
+
+            verbose_logger.debug(f"Final response: {final_response}")
+
+            return final_response
+
+        except AnthropicError:
+            # Re-raise Anthropic exceptions as-is
+            raise
+        except httpx.HTTPStatusError as e:
+            # HTTP errors - preserve the actual status code
+            verbose_logger.error(f"HTTP error in CountTokens handler: {str(e)}")
+            raise AnthropicError(
+                status_code=e.response.status_code,
+                message=e.response.text,
+            )
+        except Exception as e:
+            verbose_logger.error(f"Error in CountTokens handler: {str(e)}")
+            raise AnthropicError(
+                status_code=500,
+                message=f"CountTokens processing error: {str(e)}",
+            )
diff --git a/litellm/llms/anthropic/count_tokens/token_counter.py b/litellm/llms/anthropic/count_tokens/token_counter.py
@@ -0,0 +1,104 @@
+"""
+Anthropic Token Counter implementation using the CountTokens API.
+"""
+
+import os
+from typing import Any, Dict, List, Optional
+
+from litellm._logging import verbose_logger
+from litellm.llms.anthropic.count_tokens.handler import AnthropicCountTokensHandler
+from litellm.llms.base_llm.base_utils import BaseTokenCounter
+from litellm.types.utils import LlmProviders, TokenCountResponse
+
+# Global handler instance - reuse across all token counting requests
+anthropic_count_tokens_handler = AnthropicCountTokensHandler()
+
+
+class AnthropicTokenCounter(BaseTokenCounter):
+    """Token counter implementation for Anthropic provider using the CountTokens API."""
+
+    def should_use_token_counting_api(
+        self,
+        custom_llm_provider: Optional[str] = None,
+    ) -> bool:
+        return custom_llm_provider == LlmProviders.ANTHROPIC.value
+
+    async def count_tokens(
+        self,
+        model_to_use: str,
+        messages: Optional[List[Dict[str, Any]]],
+        contents: Optional[List[Dict[str, Any]]],
+        deployment: Optional[Dict[str, Any]] = None,
+        request_model: str = "",
+    ) -> Optional[TokenCountResponse]:
+        """
+        Count tokens using Anthropic's CountTokens API.
+
+        Args:
+            model_to_use: The model identifier
+            messages: The messages to count tokens for
+            contents: Alternative content format (not used for Anthropic)
+            deployment: Deployment configuration containing litellm_params
+            request_model: The original request model name
+
+        Returns:
+            TokenCountResponse with token count, or None if counting fails
+        """
+        from litellm.llms.anthropic.common_utils import AnthropicError
+
+        if not messages:
+            return None
+
+        deployment = deployment or {}
+        litellm_params = deployment.get("litellm_params", {})
+
+        # Get Anthropic API key from deployment config or environment
+        api_key = litellm_params.get("api_key")
+        if not api_key:
+            api_key = os.getenv("ANTHROPIC_API_KEY")
+
+        if not api_key:
+            verbose_logger.warning("No Anthropic API key found for token counting")
+            return None
+
+        try:
+            result = await anthropic_count_tokens_handler.handle_count_tokens_request(
+                model=model_to_use,
+                messages=messages,
+                api_key=api_key,
+            )
+
+            if result is not None:
+                return TokenCountResponse(
+                    total_tokens=result.get("input_tokens", 0),
+                    request_model=request_model,
+                    model_used=model_to_use,
+                    tokenizer_type="anthropic_api",
+                    original_response=result,
+                )
+        except AnthropicError as e:
+            verbose_logger.warning(
+                f"Anthropic CountTokens API error: status={e.status_code}, message={e.message}"
+            )
+            return TokenCountResponse(
+                total_tokens=0,
+                request_model=request_model,
+                model_used=model_to_use,
+                tokenizer_type="anthropic_api",
+                error=True,
+                error_message=e.message,
+                status_code=e.status_code,
+            )
+        except Exception as e:
+            verbose_logger.warning(f"Error calling Anthropic CountTokens API: {e}")
+            return TokenCountResponse(
+                total_tokens=0,
+                request_model=request_model,
+                model_used=model_to_use,
+                tokenizer_type="anthropic_api",
+                error=True,
+                error_message=str(e),
+                status_code=500,
+            )
+
+        return None