Skip to content

Commit 1538ee1

Browse files
ccurmebaskaryan
andauthored
anthropic[major]: support python 3.13 (#27916)
Last week Anthropic released version 0.39.0 of its python sdk, which enabled support for Python 3.13. This release deleted a legacy `client.count_tokens` method, which we currently access during init of the `Anthropic` LLM. Anthropic has replaced this functionality with the [client.beta.messages.count_tokens() API](anthropics/anthropic-sdk-python#726). To enable support for `anthropic >= 0.39.0` and Python 3.13, here we drop support for the legacy token counting method, and add support for the new method via `ChatAnthropic.get_num_tokens_from_messages`. To fully support the token counting API, we update the signature of `get_num_tokens_from_message` to accept tools everywhere. --------- Co-authored-by: Bagatur <[email protected]>
1 parent 759b6ed commit 1538ee1

File tree

14 files changed

+534
-542
lines changed

14 files changed

+534
-542
lines changed

.github/scripts/check_diff.py

-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@
3737
PY_312_MAX_PACKAGES = [
3838
f"libs/partners/{integration}"
3939
for integration in [
40-
"anthropic",
4140
"chroma",
4241
"couchbase",
4342
"huggingface",

libs/community/langchain_community/chat_models/anyscale.py

+24-2
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,22 @@
55
import logging
66
import os
77
import sys
8-
from typing import TYPE_CHECKING, Any, Dict, Optional, Set
8+
import warnings
9+
from typing import (
10+
TYPE_CHECKING,
11+
Any,
12+
Callable,
13+
Dict,
14+
Optional,
15+
Sequence,
16+
Set,
17+
Type,
18+
Union,
19+
)
920

1021
import requests
1122
from langchain_core.messages import BaseMessage
23+
from langchain_core.tools import BaseTool
1224
from langchain_core.utils import convert_to_secret_str, get_from_dict_or_env
1325
from pydantic import Field, SecretStr, model_validator
1426

@@ -197,10 +209,20 @@ def _get_encoding_model(self) -> tuple[str, tiktoken.Encoding]:
197209
encoding = tiktoken_.get_encoding(model)
198210
return model, encoding
199211

200-
def get_num_tokens_from_messages(self, messages: list[BaseMessage]) -> int:
212+
def get_num_tokens_from_messages(
213+
self,
214+
messages: list[BaseMessage],
215+
tools: Optional[
216+
Sequence[Union[Dict[str, Any], Type, Callable, BaseTool]]
217+
] = None,
218+
) -> int:
201219
"""Calculate num tokens with tiktoken package.
202220
Official documentation: https://github.com/openai/openai-cookbook/blob/main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb
203221
"""
222+
if tools is not None:
223+
warnings.warn(
224+
"Counting tokens in tool schemas is not yet supported. Ignoring tools."
225+
)
204226
if sys.version_info[1] <= 7:
205227
return super().get_num_tokens_from_messages(messages)
206228
model, encoding = self._get_encoding_model()

libs/community/langchain_community/chat_models/everlyai.py

+24-2
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,21 @@
44

55
import logging
66
import sys
7-
from typing import TYPE_CHECKING, Any, Dict, Optional, Set
7+
import warnings
8+
from typing import (
9+
TYPE_CHECKING,
10+
Any,
11+
Callable,
12+
Dict,
13+
Optional,
14+
Sequence,
15+
Set,
16+
Type,
17+
Union,
18+
)
819

920
from langchain_core.messages import BaseMessage
21+
from langchain_core.tools import BaseTool
1022
from langchain_core.utils import convert_to_secret_str, get_from_dict_or_env
1123
from pydantic import Field, model_validator
1224

@@ -138,11 +150,21 @@ def _get_encoding_model(self) -> tuple[str, tiktoken.Encoding]:
138150
encoding = tiktoken_.get_encoding(model)
139151
return model, encoding
140152

141-
def get_num_tokens_from_messages(self, messages: list[BaseMessage]) -> int:
153+
def get_num_tokens_from_messages(
154+
self,
155+
messages: list[BaseMessage],
156+
tools: Optional[
157+
Sequence[Union[Dict[str, Any], Type, Callable, BaseTool]]
158+
] = None,
159+
) -> int:
142160
"""Calculate num tokens with tiktoken package.
143161
144162
Official documentation: https://github.com/openai/openai-cookbook/blob/
145163
main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb"""
164+
if tools is not None:
165+
warnings.warn(
166+
"Counting tokens in tool schemas is not yet supported. Ignoring tools."
167+
)
146168
if sys.version_info[1] <= 7:
147169
return super().get_num_tokens_from_messages(messages)
148170
model, encoding = self._get_encoding_model()

libs/community/langchain_community/chat_models/openai.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
)
4747
from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
4848
from langchain_core.runnables import Runnable
49+
from langchain_core.tools import BaseTool
4950
from langchain_core.utils import (
5051
get_from_dict_or_env,
5152
get_pydantic_field_names,
@@ -644,11 +645,21 @@ def get_token_ids(self, text: str) -> List[int]:
644645
_, encoding_model = self._get_encoding_model()
645646
return encoding_model.encode(text)
646647

647-
def get_num_tokens_from_messages(self, messages: List[BaseMessage]) -> int:
648+
def get_num_tokens_from_messages(
649+
self,
650+
messages: List[BaseMessage],
651+
tools: Optional[
652+
Sequence[Union[Dict[str, Any], Type, Callable, BaseTool]]
653+
] = None,
654+
) -> int:
648655
"""Calculate num tokens for gpt-3.5-turbo and gpt-4 with tiktoken package.
649656
650657
Official documentation: https://github.com/openai/openai-cookbook/blob/
651658
main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb"""
659+
if tools is not None:
660+
warnings.warn(
661+
"Counting tokens in tool schemas is not yet supported. Ignoring tools."
662+
)
652663
if sys.version_info[1] <= 7:
653664
return super().get_num_tokens_from_messages(messages)
654665
model, encoding = self._get_encoding_model()

libs/core/langchain_core/language_models/base.py

+16-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
import warnings
34
from abc import ABC, abstractmethod
45
from collections.abc import Mapping, Sequence
56
from functools import cache
@@ -364,17 +365,31 @@ def get_num_tokens(self, text: str) -> int:
364365
"""
365366
return len(self.get_token_ids(text))
366367

367-
def get_num_tokens_from_messages(self, messages: list[BaseMessage]) -> int:
368+
def get_num_tokens_from_messages(
369+
self,
370+
messages: list[BaseMessage],
371+
tools: Optional[Sequence] = None,
372+
) -> int:
368373
"""Get the number of tokens in the messages.
369374
370375
Useful for checking if an input fits in a model's context window.
371376
377+
**Note**: the base implementation of get_num_tokens_from_messages ignores
378+
tool schemas.
379+
372380
Args:
373381
messages: The message inputs to tokenize.
382+
tools: If provided, sequence of dict, BaseModel, function, or BaseTools
383+
to be converted to tool schemas.
374384
375385
Returns:
376386
The sum of the number of tokens across the messages.
377387
"""
388+
if tools is not None:
389+
warnings.warn(
390+
"Counting tokens in tool schemas is not yet supported. Ignoring tools.",
391+
stacklevel=2,
392+
)
378393
return sum([self.get_num_tokens(get_buffer_string([m])) for m in messages])
379394

380395
@classmethod

libs/core/tests/unit_tests/messages/test_utils.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
import base64
22
import json
3+
import typing
4+
from collections.abc import Sequence
5+
from typing import Any, Callable, Optional, Union
36

47
import pytest
58

@@ -19,6 +22,7 @@
1922
merge_message_runs,
2023
trim_messages,
2124
)
25+
from langchain_core.tools import BaseTool
2226

2327

2428
@pytest.mark.parametrize("msg_cls", [HumanMessage, AIMessage, SystemMessage])
@@ -431,7 +435,15 @@ def dummy_token_counter(messages: list[BaseMessage]) -> int:
431435

432436

433437
class FakeTokenCountingModel(FakeChatModel):
434-
def get_num_tokens_from_messages(self, messages: list[BaseMessage]) -> int:
438+
def get_num_tokens_from_messages(
439+
self,
440+
messages: list[BaseMessage],
441+
tools: Optional[
442+
Sequence[
443+
Union[typing.Dict[str, Any], type, Callable, BaseTool] # noqa: UP006
444+
]
445+
] = None,
446+
) -> int:
435447
return dummy_token_counter(messages)
436448

437449

libs/langchain/tests/unit_tests/chat_models/test_base.py

-4
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33

44
import pytest
55
from langchain_core.language_models import BaseChatModel
6-
from langchain_core.messages import HumanMessage
76
from langchain_core.prompts import ChatPromptTemplate
87
from langchain_core.runnables import RunnableConfig, RunnableSequence
98
from pydantic import SecretStr
@@ -180,9 +179,6 @@ def test_configurable_with_default() -> None:
180179
)
181180

182181
assert model_with_config.model == "claude-3-sonnet-20240229" # type: ignore[attr-defined]
183-
# Anthropic defaults to using `transformers` for token counting.
184-
with pytest.raises(ImportError):
185-
model_with_config.get_num_tokens_from_messages([(HumanMessage("foo"))]) # type: ignore[attr-defined]
186182

187183
assert model_with_config.model_dump() == { # type: ignore[attr-defined]
188184
"name": None,

libs/partners/anthropic/langchain_anthropic/chat_models.py

+36-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
)
2222

2323
import anthropic
24-
from langchain_core._api import deprecated
24+
from langchain_core._api import beta, deprecated
2525
from langchain_core.callbacks import (
2626
AsyncCallbackManagerForLLMRun,
2727
CallbackManagerForLLMRun,
@@ -1113,6 +1113,41 @@ class AnswerWithJustification(BaseModel):
11131113
else:
11141114
return llm | output_parser
11151115

1116+
@beta()
1117+
def get_num_tokens_from_messages(
1118+
self,
1119+
messages: List[BaseMessage],
1120+
tools: Optional[
1121+
Sequence[Union[Dict[str, Any], Type, Callable, BaseTool]]
1122+
] = None,
1123+
) -> int:
1124+
"""Count tokens in a sequence of input messages.
1125+
1126+
Args:
1127+
messages: The message inputs to tokenize.
1128+
tools: If provided, sequence of dict, BaseModel, function, or BaseTools
1129+
to be converted to tool schemas.
1130+
1131+
.. versionchanged:: 0.3.0
1132+
1133+
Uses Anthropic's token counting API to count tokens in messages. See:
1134+
https://docs.anthropic.com/en/docs/build-with-claude/token-counting
1135+
"""
1136+
formatted_system, formatted_messages = _format_messages(messages)
1137+
kwargs: Dict[str, Any] = {}
1138+
if isinstance(formatted_system, str):
1139+
kwargs["system"] = formatted_system
1140+
if tools:
1141+
kwargs["tools"] = [convert_to_anthropic_tool(tool) for tool in tools]
1142+
1143+
response = self._client.beta.messages.count_tokens(
1144+
betas=["token-counting-2024-11-01"],
1145+
model=self.model,
1146+
messages=formatted_messages, # type: ignore[arg-type]
1147+
**kwargs,
1148+
)
1149+
return response.input_tokens
1150+
11161151

11171152
class AnthropicTool(TypedDict):
11181153
"""Anthropic tool definition."""

libs/partners/anthropic/langchain_anthropic/llms.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,6 @@ def validate_environment(self) -> Self:
109109
)
110110
self.HUMAN_PROMPT = anthropic.HUMAN_PROMPT
111111
self.AI_PROMPT = anthropic.AI_PROMPT
112-
self.count_tokens = self.client.count_tokens
113112
return self
114113

115114
@property
@@ -375,9 +374,11 @@ async def _astream(
375374

376375
def get_num_tokens(self, text: str) -> int:
377376
"""Calculate number of tokens."""
378-
if not self.count_tokens:
379-
raise NameError("Please ensure the anthropic package is loaded")
380-
return self.count_tokens(text)
377+
raise NotImplementedError(
378+
"Anthropic's legacy count_tokens method was removed in anthropic 0.39.0 "
379+
"and langchain-anthropic 0.3.0. Please use "
380+
"ChatAnthropic.get_num_tokens_from_messages instead."
381+
)
381382

382383

383384
@deprecated(since="0.1.0", removal="0.3.0", alternative="AnthropicLLM")

0 commit comments

Comments
 (0)