Skip to content

Commit 1cd95ea

Browse files
Respect Anthropic API base overrides in token counter (#15516)
1 parent f55745f commit 1cd95ea

File tree

2 files changed

+129
-5
lines changed

2 files changed

+129
-5
lines changed

litellm/proxy/utils.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3762,16 +3762,25 @@ async def count_tokens_with_anthropic_api(
37623762

37633763
# Get Anthropic API key from deployment config
37643764
anthropic_api_key = None
3765+
anthropic_api_base: Optional[str] = None
37653766
if deployment is not None:
3766-
anthropic_api_key = deployment.get("litellm_params", {}).get("api_key")
3767+
litellm_params = deployment.get("litellm_params", {})
3768+
anthropic_api_key = litellm_params.get("api_key")
3769+
anthropic_api_base = litellm_params.get("api_base")
37673770

37683771
# Fallback to environment variable
37693772
if not anthropic_api_key:
37703773
anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")
3774+
if not anthropic_api_base:
3775+
anthropic_api_base = os.getenv("ANTHROPIC_API_BASE")
37713776

37723777
if anthropic_api_key and messages:
37733778
# Call Anthropic API directly for more accurate token counting
3774-
client = anthropic.Anthropic(api_key=anthropic_api_key)
3779+
client_kwargs: Dict[str, Any] = {"api_key": anthropic_api_key}
3780+
if anthropic_api_base:
3781+
client_kwargs["base_url"] = anthropic_api_base
3782+
3783+
client = anthropic.Anthropic(**client_kwargs)
37753784

37763785
# Call with explicit parameters to satisfy type checking
37773786
# Type ignore for now since messages come from generic dict input

tests/proxy_unit_tests/test_proxy_token_counter.py

Lines changed: 118 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,23 @@
33

44

55
import sys, os
6+
from pathlib import Path
7+
from contextlib import contextmanager
8+
import importlib
69
from dotenv import load_dotenv
710

811
load_dotenv()
912
import os
1013

1114
# this file is to test litellm/proxy
1215

13-
sys.path.insert(
14-
0, os.path.abspath("../..")
15-
) # Adds the parent directory to the system path
16+
repo_root = Path(__file__).resolve().parents[2]
17+
repo_root_str = str(repo_root)
18+
if repo_root_str not in sys.path:
19+
sys.path.insert(0, repo_root_str)
20+
for module_name in list(sys.modules.keys()):
21+
if module_name == "litellm" or module_name.startswith("litellm."):
22+
sys.modules.pop(module_name)
1623
import pytest, logging
1724
import litellm
1825
from litellm.proxy.proxy_server import token_counter
@@ -517,6 +524,114 @@ async def test_factory_anthropic_endpoint_calls_anthropic_counter():
517524
mock_anthropic_count.assert_called_once()
518525

519526

527+
def _setup_mock_anthropic_client(mock_client_instance, input_tokens: int = 123):
528+
from types import SimpleNamespace
529+
from unittest.mock import MagicMock
530+
531+
mock_client_instance.beta = MagicMock()
532+
mock_client_instance.beta.messages = MagicMock()
533+
mock_client_instance.beta.messages.count_tokens.return_value = SimpleNamespace(
534+
input_tokens=input_tokens
535+
)
536+
537+
538+
@contextmanager
539+
def _local_proxy_utils_module():
540+
original_modules = {}
541+
for module_name in list(sys.modules.keys()):
542+
if module_name == "litellm" or module_name.startswith("litellm."):
543+
original_modules[module_name] = sys.modules.pop(module_name)
544+
try:
545+
proxy_utils_module = importlib.import_module("litellm.proxy.utils")
546+
yield proxy_utils_module
547+
finally:
548+
for module_name in list(sys.modules.keys()):
549+
if module_name == "litellm" or module_name.startswith("litellm."):
550+
sys.modules.pop(module_name)
551+
sys.modules.update(original_modules)
552+
553+
554+
def test_count_tokens_with_anthropic_api_respects_deployment_api_base():
555+
"""Ensure Anthropic client honors deployment-specific api_base."""
556+
import asyncio
557+
from types import SimpleNamespace
558+
from unittest.mock import MagicMock, patch
559+
import sys
560+
561+
messages = [{"role": "user", "content": "Hello"}]
562+
deployment = {
563+
"litellm_params": {
564+
"api_key": "test-key",
565+
"api_base": "https://custom.anthropic.example",
566+
}
567+
}
568+
569+
mock_client_instance = MagicMock()
570+
_setup_mock_anthropic_client(mock_client_instance)
571+
mock_anthropic_class = MagicMock(return_value=mock_client_instance)
572+
573+
anthropic_module = SimpleNamespace(Anthropic=mock_anthropic_class)
574+
575+
with _local_proxy_utils_module() as proxy_utils_module:
576+
with patch.dict(sys.modules, {"anthropic": anthropic_module}):
577+
result = asyncio.run(
578+
proxy_utils_module.count_tokens_with_anthropic_api(
579+
model_to_use="claude-sonnet-4",
580+
messages=messages,
581+
deployment=deployment,
582+
)
583+
)
584+
585+
mock_anthropic_class.assert_called_once_with(
586+
api_key="test-key", base_url="https://custom.anthropic.example"
587+
)
588+
mock_client_instance.beta.messages.count_tokens.assert_called_once_with(
589+
model="claude-sonnet-4",
590+
messages=messages,
591+
betas=["token-counting-2024-11-01"],
592+
)
593+
assert result == {
594+
"total_tokens": 123,
595+
"tokenizer_used": "anthropic_api",
596+
}
597+
598+
599+
def test_count_tokens_with_anthropic_api_respects_env_api_base(monkeypatch):
600+
"""Ensure Anthropic client honors ANTHROPIC_API_BASE env fallback."""
601+
import asyncio
602+
from types import SimpleNamespace
603+
from unittest.mock import MagicMock, patch
604+
import sys
605+
606+
messages = [{"role": "user", "content": "Hi"}]
607+
deployment = {"litellm_params": {"api_key": "test-key"}}
608+
monkeypatch.setenv("ANTHROPIC_API_BASE", "https://env.anthropic.example")
609+
610+
mock_client_instance = MagicMock()
611+
_setup_mock_anthropic_client(mock_client_instance, input_tokens=456)
612+
mock_anthropic_class = MagicMock(return_value=mock_client_instance)
613+
614+
anthropic_module = SimpleNamespace(Anthropic=mock_anthropic_class)
615+
616+
with _local_proxy_utils_module() as proxy_utils_module:
617+
with patch.dict(sys.modules, {"anthropic": anthropic_module}):
618+
result = asyncio.run(
619+
proxy_utils_module.count_tokens_with_anthropic_api(
620+
model_to_use="claude-sonnet-4",
621+
messages=messages,
622+
deployment=deployment,
623+
)
624+
)
625+
626+
mock_anthropic_class.assert_called_once_with(
627+
api_key="test-key", base_url="https://env.anthropic.example"
628+
)
629+
assert result == {
630+
"total_tokens": 456,
631+
"tokenizer_used": "anthropic_api",
632+
}
633+
634+
520635
@pytest.mark.asyncio
521636
async def test_factory_gpt4_endpoint_does_not_call_anthropic_counter():
522637
"""Test that /v1/messages/count_tokens with GPT-4 does NOT use Anthropic counter."""

0 commit comments

Comments
 (0)