Skip to content
Merged
3 changes: 3 additions & 0 deletions docs/my-website/docs/proxy/config_settings.md
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,7 @@ router_settings:
| AUDIO_SPEECH_CHUNK_SIZE | Chunk size for audio speech processing. Default is 1024
| ANTHROPIC_API_KEY | API key for Anthropic service
| ANTHROPIC_API_BASE | Base URL for Anthropic API. Default is https://api.anthropic.com
| ANTHROPIC_TOKEN_COUNTING_BETA_VERSION | Beta version header for Anthropic token counting API. Default is `token-counting-2024-11-01`
| AWS_ACCESS_KEY_ID | Access Key ID for AWS services
| AWS_BATCH_ROLE_ARN | ARN of the AWS IAM role for batch operations
| AWS_DEFAULT_REGION | Default AWS region for service interactions when AWS_REGION is not set
Expand All @@ -412,6 +413,8 @@ router_settings:
| AWS_WEB_IDENTITY_TOKEN | Web identity token for AWS
| AWS_WEB_IDENTITY_TOKEN_FILE | Path to file containing web identity token for AWS
| AZURE_API_VERSION | Version of the Azure API being used
| AZURE_AI_API_BASE | Base URL for Azure AI services (e.g., Azure AI Anthropic)
| AZURE_AI_API_KEY | API key for Azure AI services (e.g., Azure AI Anthropic)
| AZURE_AUTHORITY_HOST | Azure authority host URL
| AZURE_CERTIFICATE_PASSWORD | Password for Azure OpenAI certificate
| AZURE_CLIENT_ID | Client ID for Azure services
Expand Down
3 changes: 3 additions & 0 deletions litellm/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,9 @@
EMAIL_BUDGET_ALERT_MAX_SPEND_ALERT_PERCENTAGE = float(os.getenv("EMAIL_BUDGET_ALERT_MAX_SPEND_ALERT_PERCENTAGE", 0.8)) # 80% of max budget
############### LLM Provider Constants ###############
### ANTHROPIC CONSTANTS ###
ANTHROPIC_TOKEN_COUNTING_BETA_VERSION = os.getenv(
"ANTHROPIC_TOKEN_COUNTING_BETA_VERSION", "token-counting-2024-11-01"
)
ANTHROPIC_SKILLS_API_BETA_VERSION = "skills-2025-10-02"
ANTHROPIC_WEB_SEARCH_TOOL_MAX_USES = {
"low": 1,
Expand Down
73 changes: 33 additions & 40 deletions litellm/llms/anthropic/common_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
This file contains common utils for anthropic calls.
"""

from typing import Any, Dict, List, Optional, Union
from typing import Dict, List, Optional, Union

import httpx

Expand All @@ -14,11 +14,36 @@
from litellm.llms.base_llm.chat.transformation import BaseLLMException
from litellm.types.llms.anthropic import (
ANTHROPIC_HOSTED_TOOLS,
ANTHROPIC_OAUTH_BETA_HEADER,
ANTHROPIC_OAUTH_TOKEN_PREFIX,
AllAnthropicToolsValues,
AnthropicMcpServerTool,
)
from litellm.types.llms.openai import AllMessageValues
from litellm.types.utils import TokenCountResponse


def optionally_handle_anthropic_oauth(
headers: dict, api_key: Optional[str]
) -> tuple[dict, Optional[str]]:
"""
Handle Anthropic OAuth token detection and header setup.

If an OAuth token is detected in the Authorization header, extracts it
and sets the required OAuth headers.

Args:
headers: Request headers dict
api_key: Current API key (may be None)

Returns:
Tuple of (updated headers, api_key)
"""
auth_header = headers.get("authorization", "")
if auth_header and auth_header.startswith(f"Bearer {ANTHROPIC_OAUTH_TOKEN_PREFIX}"):
api_key = auth_header.replace("Bearer ", "")
headers["anthropic-beta"] = ANTHROPIC_OAUTH_BETA_HEADER
headers["anthropic-dangerous-direct-browser-access"] = "true"
return headers, api_key


class AnthropicError(BaseLLMException):
Expand Down Expand Up @@ -372,6 +397,8 @@ def validate_environment(
api_key: Optional[str] = None,
api_base: Optional[str] = None,
) -> Dict:
# Check for Anthropic OAuth token in headers
headers, api_key = optionally_handle_anthropic_oauth(headers=headers, api_key=api_key)
if api_key is None:
raise litellm.AuthenticationError(
message="Missing Anthropic API Key - A call is being made to anthropic but no key is set either in the environment variables or via params. Please set `ANTHROPIC_API_KEY` in your environment vars",
Expand Down Expand Up @@ -476,45 +503,11 @@ def get_token_counter(self) -> Optional[BaseTokenCounter]:
Returns:
AnthropicTokenCounter instance for this provider.
"""
return AnthropicTokenCounter()


class AnthropicTokenCounter(BaseTokenCounter):
"""Token counter implementation for Anthropic provider."""

def should_use_token_counting_api(
self,
custom_llm_provider: Optional[str] = None,
) -> bool:
from litellm.types.utils import LlmProviders
return custom_llm_provider == LlmProviders.ANTHROPIC.value

async def count_tokens(
self,
model_to_use: str,
messages: Optional[List[Dict[str, Any]]],
contents: Optional[List[Dict[str, Any]]],
deployment: Optional[Dict[str, Any]] = None,
request_model: str = "",
) -> Optional[TokenCountResponse]:
from litellm.proxy.utils import count_tokens_with_anthropic_api

result = await count_tokens_with_anthropic_api(
model_to_use=model_to_use,
messages=messages,
deployment=deployment,
from litellm.llms.anthropic.count_tokens.token_counter import (
AnthropicTokenCounter,
)

if result is not None:
return TokenCountResponse(
total_tokens=result.get("total_tokens", 0),
request_model=request_model,
model_used=model_to_use,
tokenizer_type=result.get("tokenizer_used", ""),
original_response=result,
)

return None

return AnthropicTokenCounter()


def process_anthropic_headers(headers: Union[httpx.Headers, dict]) -> dict:
Expand Down
15 changes: 15 additions & 0 deletions litellm/llms/anthropic/count_tokens/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
"""
Anthropic CountTokens API implementation.
"""

from litellm.llms.anthropic.count_tokens.handler import AnthropicCountTokensHandler
from litellm.llms.anthropic.count_tokens.token_counter import AnthropicTokenCounter
from litellm.llms.anthropic.count_tokens.transformation import (
AnthropicCountTokensConfig,
)

__all__ = [
"AnthropicCountTokensHandler",
"AnthropicCountTokensConfig",
"AnthropicTokenCounter",
]
126 changes: 126 additions & 0 deletions litellm/llms/anthropic/count_tokens/handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
"""
Anthropic CountTokens API handler.

Uses httpx for HTTP requests instead of the Anthropic SDK.
"""

from typing import Any, Dict, List, Optional, Union

import httpx

import litellm
from litellm._logging import verbose_logger
from litellm.llms.anthropic.common_utils import AnthropicError
from litellm.llms.anthropic.count_tokens.transformation import (
AnthropicCountTokensConfig,
)
from litellm.llms.custom_httpx.http_handler import get_async_httpx_client


class AnthropicCountTokensHandler(AnthropicCountTokensConfig):
"""
Handler for Anthropic CountTokens API requests.

Uses httpx for HTTP requests, following the same pattern as BedrockCountTokensHandler.
"""

async def handle_count_tokens_request(
self,
model: str,
messages: List[Dict[str, Any]],
api_key: str,
api_base: Optional[str] = None,
timeout: Optional[Union[float, httpx.Timeout]] = None,
) -> Dict[str, Any]:
"""
Handle a CountTokens request using httpx.

Args:
model: The model identifier (e.g., "claude-3-5-sonnet-20241022")
messages: The messages to count tokens for
api_key: The Anthropic API key
api_base: Optional custom API base URL
timeout: Optional timeout for the request (defaults to litellm.request_timeout)

Returns:
Dictionary containing token count response

Raises:
AnthropicError: If the API request fails
"""
try:
# Validate the request
self.validate_request(model, messages)

verbose_logger.debug(
f"Processing Anthropic CountTokens request for model: {model}"
)

# Transform request to Anthropic format
request_body = self.transform_request_to_count_tokens(
model=model,
messages=messages,
)

verbose_logger.debug(f"Transformed request: {request_body}")

# Get endpoint URL
endpoint_url = api_base or self.get_anthropic_count_tokens_endpoint()

verbose_logger.debug(f"Making request to: {endpoint_url}")

# Get required headers
headers = self.get_required_headers(api_key)

# Use LiteLLM's async httpx client
async_client = get_async_httpx_client(
llm_provider=litellm.LlmProviders.ANTHROPIC
)

# Use provided timeout or fall back to litellm.request_timeout
request_timeout = timeout if timeout is not None else litellm.request_timeout

response = await async_client.post(
endpoint_url,
headers=headers,
json=request_body,
timeout=request_timeout,
)

verbose_logger.debug(f"Response status: {response.status_code}")

if response.status_code != 200:
error_text = response.text
verbose_logger.error(f"Anthropic API error: {error_text}")
raise AnthropicError(
status_code=response.status_code,
message=error_text,
)

anthropic_response = response.json()

verbose_logger.debug(f"Anthropic response: {anthropic_response}")

# Transform response
final_response = self.transform_response(anthropic_response)

verbose_logger.debug(f"Final response: {final_response}")

return final_response

except AnthropicError:
# Re-raise Anthropic exceptions as-is
raise
except httpx.HTTPStatusError as e:
# HTTP errors - preserve the actual status code
verbose_logger.error(f"HTTP error in CountTokens handler: {str(e)}")
raise AnthropicError(
status_code=e.response.status_code,
message=e.response.text,
)
except Exception as e:
verbose_logger.error(f"Error in CountTokens handler: {str(e)}")
raise AnthropicError(
status_code=500,
message=f"CountTokens processing error: {str(e)}",
)
104 changes: 104 additions & 0 deletions litellm/llms/anthropic/count_tokens/token_counter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
"""
Anthropic Token Counter implementation using the CountTokens API.
"""

import os
from typing import Any, Dict, List, Optional

from litellm._logging import verbose_logger
from litellm.llms.anthropic.count_tokens.handler import AnthropicCountTokensHandler
from litellm.llms.base_llm.base_utils import BaseTokenCounter
from litellm.types.utils import LlmProviders, TokenCountResponse

# Global handler instance - reuse across all token counting requests
anthropic_count_tokens_handler = AnthropicCountTokensHandler()


class AnthropicTokenCounter(BaseTokenCounter):
"""Token counter implementation for Anthropic provider using the CountTokens API."""

def should_use_token_counting_api(
self,
custom_llm_provider: Optional[str] = None,
) -> bool:
return custom_llm_provider == LlmProviders.ANTHROPIC.value

async def count_tokens(
self,
model_to_use: str,
messages: Optional[List[Dict[str, Any]]],
contents: Optional[List[Dict[str, Any]]],
deployment: Optional[Dict[str, Any]] = None,
request_model: str = "",
) -> Optional[TokenCountResponse]:
"""
Count tokens using Anthropic's CountTokens API.

Args:
model_to_use: The model identifier
messages: The messages to count tokens for
contents: Alternative content format (not used for Anthropic)
deployment: Deployment configuration containing litellm_params
request_model: The original request model name

Returns:
TokenCountResponse with token count, or None if counting fails
"""
from litellm.llms.anthropic.common_utils import AnthropicError

if not messages:
return None

deployment = deployment or {}
litellm_params = deployment.get("litellm_params", {})

# Get Anthropic API key from deployment config or environment
api_key = litellm_params.get("api_key")
if not api_key:
api_key = os.getenv("ANTHROPIC_API_KEY")

if not api_key:
verbose_logger.warning("No Anthropic API key found for token counting")
return None

try:
result = await anthropic_count_tokens_handler.handle_count_tokens_request(
model=model_to_use,
messages=messages,
api_key=api_key,
)

if result is not None:
return TokenCountResponse(
total_tokens=result.get("input_tokens", 0),
request_model=request_model,
model_used=model_to_use,
tokenizer_type="anthropic_api",
original_response=result,
)
except AnthropicError as e:
verbose_logger.warning(
f"Anthropic CountTokens API error: status={e.status_code}, message={e.message}"
)
return TokenCountResponse(
total_tokens=0,
request_model=request_model,
model_used=model_to_use,
tokenizer_type="anthropic_api",
error=True,
error_message=e.message,
status_code=e.status_code,
)
except Exception as e:
verbose_logger.warning(f"Error calling Anthropic CountTokens API: {e}")
return TokenCountResponse(
total_tokens=0,
request_model=request_model,
model_used=model_to_use,
tokenizer_type="anthropic_api",
error=True,
error_message=str(e),
status_code=500,
)

return None
Loading
Loading