Skip to content

Commit d84ab35

Browse files
Erick Friisfrances720
authored andcommitted
openai: audio modality, remove sockets from unit tests (langchain-ai#27436)
1 parent 97dc578 commit d84ab35

File tree

10 files changed

+417
-279
lines changed

10 files changed

+417
-279
lines changed

libs/partners/openai/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
__pycache__
2+
tiktoken_cache

libs/partners/openai/Makefile

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,19 @@ TEST_FILE ?= tests/unit_tests/
88

99
integration_test integration_tests: TEST_FILE=tests/integration_tests/
1010

11-
test tests integration_test integration_tests:
11+
# unit tests are run with the --disable-socket flag to prevent network calls
12+
# use tiktoken cache to enable token counting without socket (internet) access
13+
test tests:
14+
mkdir -p tiktoken_cache
15+
@if [ ! -f tiktoken_cache/9b5ad71b2ce5302211f9c61530b329a4922fc6a4 ]; then \
16+
curl -o tiktoken_cache/9b5ad71b2ce5302211f9c61530b329a4922fc6a4 https://openaipublic.blob.core.windows.net/encodings/cl100k_base.tiktoken; \
17+
fi
18+
@if [ ! -f tiktoken_cache/fb374d419588a4632f3f557e76b4b70aebbca790 ]; then \
19+
curl -o tiktoken_cache/fb374d419588a4632f3f557e76b4b70aebbca790 https://openaipublic.blob.core.windows.net/encodings/o200k_base.tiktoken; \
20+
fi
21+
TIKTOKEN_CACHE_DIR=tiktoken_cache poetry run pytest --disable-socket --allow-unix-socket $(TEST_FILE)
22+
23+
integration_test integration_tests:
1224
poetry run pytest $(TEST_FILE)
1325

1426
test_watch:

libs/partners/openai/langchain_openai/chat_models/base.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,8 @@ def _convert_dict_to_message(_dict: Mapping[str, Any]) -> BaseMessage:
129129
invalid_tool_calls.append(
130130
make_invalid_tool_call(raw_tool_call, str(e))
131131
)
132+
if audio := _dict.get("audio"):
133+
additional_kwargs["audio"] = audio
132134
return AIMessage(
133135
content=content,
134136
additional_kwargs=additional_kwargs,
@@ -219,6 +221,17 @@ def _convert_message_to_dict(message: BaseMessage) -> dict:
219221
# If tool calls present, content null value should be None not empty string.
220222
if "function_call" in message_dict or "tool_calls" in message_dict:
221223
message_dict["content"] = message_dict["content"] or None
224+
225+
if "audio" in message.additional_kwargs:
226+
# openai doesn't support passing the data back - only the id
227+
# https://platform.openai.com/docs/guides/audio/multi-turn-conversations
228+
raw_audio = message.additional_kwargs["audio"]
229+
audio = (
230+
{"id": message.additional_kwargs["audio"]["id"]}
231+
if "id" in raw_audio
232+
else raw_audio
233+
)
234+
message_dict["audio"] = audio
222235
elif isinstance(message, SystemMessage):
223236
message_dict["role"] = "system"
224237
elif isinstance(message, FunctionMessage):

libs/partners/openai/poetry.lock

Lines changed: 311 additions & 275 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

libs/partners/openai/pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ ignore_missing_imports = true
2424
[tool.poetry.dependencies]
2525
python = ">=3.9,<4.0"
2626
langchain-core = "^0.3.9"
27-
openai = "^1.40.0"
27+
openai = "^1.52.0"
2828
tiktoken = ">=0.7,<1"
2929

3030
[tool.ruff.lint]
@@ -72,6 +72,7 @@ syrupy = "^4.0.2"
7272
pytest-watcher = "^0.3.4"
7373
pytest-asyncio = "^0.21.1"
7474
pytest-cov = "^4.1.0"
75+
pytest-socket = "^0.6.0"
7576
[[tool.poetry.group.test.dependencies.numpy]]
7677
version = "^1"
7778
python = "<3.12"
Binary file not shown.

libs/partners/openai/tests/integration_tests/chat_models/test_base.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import base64
44
import json
5+
from pathlib import Path
56
from typing import Any, AsyncIterator, List, Literal, Optional, cast
67

78
import httpx
@@ -949,3 +950,71 @@ async def test_json_mode_async() -> None:
949950
assert isinstance(full, AIMessageChunk)
950951
assert isinstance(full.content, str)
951952
assert json.loads(full.content) == {"a": 1}
953+
954+
955+
def test_audio_output_modality() -> None:
956+
llm = ChatOpenAI(
957+
model="gpt-4o-audio-preview",
958+
temperature=0,
959+
model_kwargs={
960+
"modalities": ["text", "audio"],
961+
"audio": {"voice": "alloy", "format": "wav"},
962+
},
963+
)
964+
965+
history: List[BaseMessage] = [
966+
HumanMessage("Make me a short audio clip of you yelling")
967+
]
968+
969+
output = llm.invoke(history)
970+
971+
assert isinstance(output, AIMessage)
972+
assert "audio" in output.additional_kwargs
973+
974+
history.append(output)
975+
history.append(HumanMessage("Make me a short audio clip of you whispering"))
976+
977+
output = llm.invoke(history)
978+
979+
assert isinstance(output, AIMessage)
980+
assert "audio" in output.additional_kwargs
981+
982+
983+
def test_audio_input_modality() -> None:
984+
llm = ChatOpenAI(
985+
model="gpt-4o-audio-preview",
986+
temperature=0,
987+
model_kwargs={
988+
"modalities": ["text", "audio"],
989+
"audio": {"voice": "alloy", "format": "wav"},
990+
},
991+
)
992+
filepath = Path(__file__).parent / "audio_input.wav"
993+
994+
audio_data = filepath.read_bytes()
995+
b64_audio_data = base64.b64encode(audio_data).decode("utf-8")
996+
997+
history: list[BaseMessage] = [
998+
HumanMessage(
999+
[
1000+
{"type": "text", "text": "What is happening in this audio clip"},
1001+
{
1002+
"type": "input_audio",
1003+
"input_audio": {"data": b64_audio_data, "format": "wav"},
1004+
},
1005+
]
1006+
)
1007+
]
1008+
1009+
output = llm.invoke(history)
1010+
1011+
assert isinstance(output, AIMessage)
1012+
assert "audio" in output.additional_kwargs
1013+
1014+
history.append(output)
1015+
history.append(HumanMessage("Why?"))
1016+
1017+
output = llm.invoke(history)
1018+
1019+
assert isinstance(output, AIMessage)
1020+
assert "audio" in output.additional_kwargs

libs/partners/openai/tests/unit_tests/chat_models/test_base.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,12 @@ def test__convert_dict_to_message_tool_call() -> None:
162162
name="GenerateUsername",
163163
args="oops",
164164
id="call_wm0JY6CdwOMZ4eTxHWUThDNz",
165-
error="Function GenerateUsername arguments:\n\noops\n\nare not valid JSON. Received JSONDecodeError Expecting value: line 1 column 1 (char 0)", # noqa: E501
165+
error=(
166+
"Function GenerateUsername arguments:\n\noops\n\nare not "
167+
"valid JSON. Received JSONDecodeError Expecting value: line 1 "
168+
"column 1 (char 0)\nFor troubleshooting, visit: https://python"
169+
".langchain.com/docs/troubleshooting/errors/OUTPUT_PARSING_FAILURE"
170+
),
166171
type="invalid_tool_call",
167172
)
168173
],

libs/partners/openai/tests/unit_tests/llms/test_base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def mock_completion() -> dict:
5858
}
5959

6060

61-
@pytest.mark.parametrize("model", ["gpt-3.5-turbo-instruct", "text-davinci-003"])
61+
@pytest.mark.parametrize("model", ["gpt-3.5-turbo-instruct"])
6262
def test_get_token_ids(model: str) -> None:
6363
OpenAI(model=model).get_token_ids("foo")
6464
return

libs/partners/openai/tests/unit_tests/test_token_counts.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
_CHAT_MODELS = ["gpt-4", "gpt-4-32k", "gpt-3.5-turbo"]
1717

1818

19+
@pytest.mark.xfail(reason="Old models require different tiktoken cached file")
1920
@pytest.mark.parametrize("model", _MODELS)
2021
def test_openai_get_num_tokens(model: str) -> None:
2122
"""Test get_tokens."""

0 commit comments

Comments
 (0)