Skip to content

Commit af35e5a

Browse files
committed
wip: changes with the latest openai package
1 parent 3465e71 commit af35e5a

File tree

15 files changed

+222
-247
lines changed

15 files changed

+222
-247
lines changed

examples/realtime/app/server.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,11 @@ async def _serialize_event(self, event: RealtimeSessionEvent) -> dict[str, Any]:
101101
elif event.type == "history_updated":
102102
base_event["history"] = [item.model_dump(mode="json") for item in event.history]
103103
elif event.type == "history_added":
104-
pass
104+
# Provide the added item so the UI can render incrementally.
105+
try:
106+
base_event["item"] = event.item.model_dump(mode="json")
107+
except Exception:
108+
base_event["item"] = None
105109
elif event.type == "guardrail_tripped":
106110
base_event["guardrail_results"] = [
107111
{"name": result.guardrail.name} for result in event.guardrail_results

examples/realtime/app/static/app.js

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,12 @@ class RealtimeDemo {
210210
case 'history_updated':
211211
this.updateMessagesFromHistory(event.history);
212212
break;
213+
case 'history_added':
214+
// Append just the new item without clearing the thread.
215+
if (event.item) {
216+
this.addMessageFromItem(event.item);
217+
}
218+
break;
213219
}
214220
}
215221

@@ -235,13 +241,7 @@ class RealtimeDemo {
235241
// Extract text from content array
236242
item.content.forEach(contentPart => {
237243
console.log('Content part:', contentPart);
238-
if (contentPart.type === 'text' && contentPart.text) {
239-
content += contentPart.text;
240-
} else if (contentPart.type === 'input_text' && contentPart.text) {
241-
content += contentPart.text;
242-
} else if (contentPart.type === 'input_audio' && contentPart.transcript) {
243-
content += contentPart.transcript;
244-
} else if (contentPart.type === 'audio' && contentPart.transcript) {
244+
if (contentPart && contentPart.transcript) {
245245
content += contentPart.transcript;
246246
}
247247
});
@@ -263,6 +263,35 @@ class RealtimeDemo {
263263

264264
this.scrollToBottom();
265265
}
266+
267+
addMessageFromItem(item) {
268+
try {
269+
if (!item || item.type !== 'message') return;
270+
const role = item.role;
271+
let content = '';
272+
273+
if (Array.isArray(item.content)) {
274+
for (const contentPart of item.content) {
275+
if (!contentPart || typeof contentPart !== 'object') continue;
276+
if (contentPart.type === 'text' && contentPart.text) {
277+
content += contentPart.text;
278+
} else if (contentPart.type === 'input_text' && contentPart.text) {
279+
content += contentPart.text;
280+
} else if (contentPart.type === 'input_audio' && contentPart.transcript) {
281+
content += contentPart.transcript;
282+
} else if (contentPart.type === 'audio' && contentPart.transcript) {
283+
content += contentPart.transcript;
284+
}
285+
}
286+
}
287+
288+
if (content && content.trim()) {
289+
this.addMessage(role, content.trim());
290+
}
291+
} catch (e) {
292+
console.error('Failed to add message from item:', e, item);
293+
}
294+
}
266295

267296
addMessage(type, content) {
268297
const messageDiv = document.createElement('div');
@@ -464,4 +493,4 @@ class RealtimeDemo {
464493
// Initialize the demo when the page loads
465494
document.addEventListener('DOMContentLoaded', () => {
466495
new RealtimeDemo();
467-
});
496+
});

examples/realtime/app/static/favicon.ico

Whitespace-only changes.

examples/realtime/cli/demo.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,7 @@ async def _on_event(self, event: RealtimeSessionEvent) -> None:
280280
elif event.type == "history_added":
281281
pass # Skip these frequent events
282282
elif event.type == "raw_model_event":
283-
print(f"Raw model event: {_truncate_str(str(event.data), 50)}")
283+
print(f"Raw model event: {_truncate_str(str(event.data), 200)}")
284284
else:
285285
print(f"Unknown event type: {event.type}")
286286
except Exception as e:

src/agents/realtime/_util.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,6 @@
44

55

66
def calculate_audio_length_ms(format: RealtimeAudioFormat | None, audio_bytes: bytes) -> float:
7-
if format and format.startswith("g711"):
7+
if format and isinstance(format, str) and format.startswith("g711"):
88
return (len(audio_bytes) / 8000) * 1000
99
return (len(audio_bytes) / 24 / 2) * 1000
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
from __future__ import annotations
2+
3+
from typing import Literal
4+
5+
from openai.types.realtime.realtime_audio_formats import (
6+
AudioPCM,
7+
AudioPCMA,
8+
AudioPCMU,
9+
RealtimeAudioFormats,
10+
)
11+
12+
from ..logger import logger
13+
14+
type LegacyRealtimeAudioFormats = Literal["pcm16", "g711_ulaw", "g711_alaw"]
15+
16+
17+
def to_realtime_audio_format(
18+
input_audio_format: LegacyRealtimeAudioFormats | RealtimeAudioFormats | None,
19+
) -> RealtimeAudioFormats | None:
20+
format: RealtimeAudioFormats | None = None
21+
if input_audio_format is not None:
22+
if isinstance(input_audio_format, str):
23+
if input_audio_format in ["pcm16", "audio/pcm", "pcm"]:
24+
format = AudioPCM(type="audio/pcm", rate=24000)
25+
elif input_audio_format in ["g711_ulaw", "audio/pcmu", "pcmu"]:
26+
format = AudioPCMU(type="audio/pcmu")
27+
elif input_audio_format in ["g711_alaw", "audio/pcma", "pcma"]:
28+
format = AudioPCMA(type="audio/pcma")
29+
else:
30+
logger.debug(f"Unknown input_audio_format: {input_audio_format}")
31+
else:
32+
format = input_audio_format
33+
return format

src/agents/realtime/config.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@
66
Union,
77
)
88

9+
from openai.types.realtime.realtime_audio_formats import (
10+
RealtimeAudioFormats as OpenAIRealtimeAudioFormats,
11+
)
912
from typing_extensions import NotRequired, TypeAlias, TypedDict
1013

1114
from agents.prompts import Prompt
@@ -107,10 +110,10 @@ class RealtimeSessionModelSettings(TypedDict):
107110
speed: NotRequired[float]
108111
"""The speed of the model's responses."""
109112

110-
input_audio_format: NotRequired[RealtimeAudioFormat]
113+
input_audio_format: NotRequired[RealtimeAudioFormat | OpenAIRealtimeAudioFormats]
111114
"""The format for input audio streams."""
112115

113-
output_audio_format: NotRequired[RealtimeAudioFormat]
116+
output_audio_format: NotRequired[RealtimeAudioFormat | OpenAIRealtimeAudioFormats]
114117
"""The format for output audio streams."""
115118

116119
input_audio_transcription: NotRequired[RealtimeInputAudioTranscriptionConfig]

0 commit comments

Comments
 (0)