Skip to content

Commit f6e1f83

Browse files
committed
wip
1 parent d544eea commit f6e1f83

File tree

7 files changed

+435
-18
lines changed

7 files changed

+435
-18
lines changed

app/components/realtime-chat/realtime-chat.module.scss

+7
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,19 @@
2424
.bottom-icons {
2525
display: flex;
2626
justify-content: space-between;
27+
align-items: center;
2728
width: 100%;
2829
position: absolute;
2930
bottom: 20px;
3031
box-sizing: border-box;
3132
padding: 0 20px;
3233
}
34+
.icon-center {
35+
display: flex;
36+
justify-content: center;
37+
align-items: center;
38+
gap: 4px;
39+
}
3340

3441
.icon-left,
3542
.icon-right {

app/components/realtime-chat/realtime-chat.tsx

+229-15
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,220 @@
11
import VoiceIcon from "@/app/icons/voice.svg";
22
import VoiceOffIcon from "@/app/icons/voice-off.svg";
33
import Close24Icon from "@/app/icons/close-24.svg";
4+
import PowerIcon from "@/app/icons/power.svg";
5+
46
import styles from "./realtime-chat.module.scss";
57
import clsx from "clsx";
68

7-
import { useState, useRef, useCallback } from "react";
9+
import { useState, useRef, useCallback, useEffect } from "react";
810

911
import { useAccessStore, useChatStore, ChatMessage } from "@/app/store";
1012

13+
import { IconButton } from "@/app/components/button";
14+
15+
import {
16+
Modality,
17+
RTClient,
18+
RTInputAudioItem,
19+
RTResponse,
20+
TurnDetection,
21+
} from "rt-client";
22+
import { AudioHandler } from "@/app/lib/audio";
23+
1124
interface RealtimeChatProps {
1225
onClose?: () => void;
1326
onStartVoice?: () => void;
1427
onPausedVoice?: () => void;
15-
sampleRate?: number;
1628
}
1729

1830
export function RealtimeChat({
1931
onClose,
2032
onStartVoice,
2133
onPausedVoice,
22-
sampleRate = 24000,
2334
}: RealtimeChatProps) {
24-
const [isVoicePaused, setIsVoicePaused] = useState(true);
25-
const clientRef = useRef<null>(null);
2635
const currentItemId = useRef<string>("");
2736
const currentBotMessage = useRef<ChatMessage | null>();
2837
const currentUserMessage = useRef<ChatMessage | null>();
2938
const accessStore = useAccessStore.getState();
3039
const chatStore = useChatStore();
3140

41+
const [isRecording, setIsRecording] = useState(false);
42+
const [isConnected, setIsConnected] = useState(false);
43+
const [isConnecting, setIsConnecting] = useState(false);
44+
const [modality, setModality] = useState("audio");
45+
const [isAzure, setIsAzure] = useState(false);
46+
const [endpoint, setEndpoint] = useState("");
47+
const [deployment, setDeployment] = useState("");
48+
const [useVAD, setUseVAD] = useState(true);
49+
50+
const clientRef = useRef<RTClient | null>(null);
51+
const audioHandlerRef = useRef<AudioHandler | null>(null);
52+
53+
const apiKey = accessStore.openaiApiKey;
54+
55+
const handleConnect = async () => {
56+
if (!isConnected) {
57+
try {
58+
setIsConnecting(true);
59+
clientRef.current = isAzure
60+
? new RTClient(new URL(endpoint), { key: apiKey }, { deployment })
61+
: new RTClient(
62+
{ key: apiKey },
63+
{ model: "gpt-4o-realtime-preview-2024-10-01" },
64+
);
65+
const modalities: Modality[] =
66+
modality === "audio" ? ["text", "audio"] : ["text"];
67+
const turnDetection: TurnDetection = useVAD
68+
? { type: "server_vad" }
69+
: null;
70+
clientRef.current.configure({
71+
instructions: "Hi",
72+
input_audio_transcription: { model: "whisper-1" },
73+
turn_detection: turnDetection,
74+
tools: [],
75+
temperature: 0.9,
76+
modalities,
77+
});
78+
startResponseListener();
79+
80+
setIsConnected(true);
81+
} catch (error) {
82+
console.error("Connection failed:", error);
83+
} finally {
84+
setIsConnecting(false);
85+
}
86+
} else {
87+
await disconnect();
88+
}
89+
};
90+
91+
const disconnect = async () => {
92+
if (clientRef.current) {
93+
try {
94+
await clientRef.current.close();
95+
clientRef.current = null;
96+
setIsConnected(false);
97+
} catch (error) {
98+
console.error("Disconnect failed:", error);
99+
}
100+
}
101+
};
102+
103+
const startResponseListener = async () => {
104+
if (!clientRef.current) return;
105+
106+
try {
107+
for await (const serverEvent of clientRef.current.events()) {
108+
if (serverEvent.type === "response") {
109+
await handleResponse(serverEvent);
110+
} else if (serverEvent.type === "input_audio") {
111+
await handleInputAudio(serverEvent);
112+
}
113+
}
114+
} catch (error) {
115+
if (clientRef.current) {
116+
console.error("Response iteration error:", error);
117+
}
118+
}
119+
};
120+
121+
const handleResponse = async (response: RTResponse) => {
122+
for await (const item of response) {
123+
if (item.type === "message" && item.role === "assistant") {
124+
const message = {
125+
type: item.role,
126+
content: "",
127+
};
128+
// setMessages((prevMessages) => [...prevMessages, message]);
129+
for await (const content of item) {
130+
if (content.type === "text") {
131+
for await (const text of content.textChunks()) {
132+
message.content += text;
133+
// setMessages((prevMessages) => {
134+
// prevMessages[prevMessages.length - 1].content = message.content;
135+
// return [...prevMessages];
136+
// });
137+
}
138+
} else if (content.type === "audio") {
139+
const textTask = async () => {
140+
for await (const text of content.transcriptChunks()) {
141+
message.content += text;
142+
// setMessages((prevMessages) => {
143+
// prevMessages[prevMessages.length - 1].content =
144+
// message.content;
145+
// return [...prevMessages];
146+
// });
147+
}
148+
};
149+
const audioTask = async () => {
150+
audioHandlerRef.current?.startStreamingPlayback();
151+
for await (const audio of content.audioChunks()) {
152+
audioHandlerRef.current?.playChunk(audio);
153+
}
154+
};
155+
await Promise.all([textTask(), audioTask()]);
156+
}
157+
}
158+
}
159+
}
160+
};
161+
162+
const handleInputAudio = async (item: RTInputAudioItem) => {
163+
audioHandlerRef.current?.stopStreamingPlayback();
164+
await item.waitForCompletion();
165+
// setMessages((prevMessages) => [
166+
// ...prevMessages,
167+
// {
168+
// type: "user",
169+
// content: item.transcription || "",
170+
// },
171+
// ]);
172+
};
173+
174+
const toggleRecording = async () => {
175+
if (!isRecording && clientRef.current) {
176+
try {
177+
if (!audioHandlerRef.current) {
178+
audioHandlerRef.current = new AudioHandler();
179+
await audioHandlerRef.current.initialize();
180+
}
181+
await audioHandlerRef.current.startRecording(async (chunk) => {
182+
await clientRef.current?.sendAudio(chunk);
183+
});
184+
setIsRecording(true);
185+
} catch (error) {
186+
console.error("Failed to start recording:", error);
187+
}
188+
} else if (audioHandlerRef.current) {
189+
try {
190+
audioHandlerRef.current.stopRecording();
191+
if (!useVAD) {
192+
const inputAudio = await clientRef.current?.commitAudio();
193+
await handleInputAudio(inputAudio!);
194+
await clientRef.current?.generateResponse();
195+
}
196+
setIsRecording(false);
197+
} catch (error) {
198+
console.error("Failed to stop recording:", error);
199+
}
200+
}
201+
};
202+
203+
useEffect(() => {
204+
const initAudioHandler = async () => {
205+
const handler = new AudioHandler();
206+
await handler.initialize();
207+
audioHandlerRef.current = handler;
208+
};
209+
210+
initAudioHandler().catch(console.error);
211+
212+
return () => {
213+
disconnect();
214+
audioHandlerRef.current?.close().catch(console.error);
215+
};
216+
}, []);
217+
32218
// useEffect(() => {
33219
// if (
34220
// clientRef.current?.getTurnDetectionType() === "server_vad" &&
@@ -223,12 +409,16 @@ export function RealtimeChat({
223409

224410
const handleStartVoice = useCallback(() => {
225411
onStartVoice?.();
226-
setIsVoicePaused(false);
412+
handleConnect();
227413
}, []);
228414

229415
const handlePausedVoice = () => {
230416
onPausedVoice?.();
231-
setIsVoicePaused(true);
417+
};
418+
419+
const handleClose = () => {
420+
onClose?.();
421+
disconnect();
232422
};
233423

234424
return (
@@ -241,15 +431,39 @@ export function RealtimeChat({
241431
<div className={styles["icon-center"]}></div>
242432
</div>
243433
<div className={styles["bottom-icons"]}>
244-
<div className={styles["icon-left"]}>
245-
{isVoicePaused ? (
246-
<VoiceOffIcon onClick={handleStartVoice} />
247-
) : (
248-
<VoiceIcon onClick={handlePausedVoice} />
249-
)}
434+
<div>
435+
<IconButton
436+
icon={isRecording ? <VoiceOffIcon /> : <VoiceIcon />}
437+
onClick={toggleRecording}
438+
disabled={!isConnected}
439+
bordered
440+
shadow
441+
/>
442+
</div>
443+
<div className={styles["icon-center"]}>
444+
<IconButton
445+
icon={<PowerIcon />}
446+
text={
447+
isConnecting
448+
? "Connecting..."
449+
: isConnected
450+
? "Disconnect"
451+
: "Connect"
452+
}
453+
onClick={handleConnect}
454+
disabled={isConnecting}
455+
bordered
456+
shadow
457+
/>
250458
</div>
251-
<div className={styles["icon-right"]} onClick={onClose}>
252-
<Close24Icon />
459+
<div onClick={handleClose}>
460+
<IconButton
461+
icon={<Close24Icon />}
462+
onClick={handleClose}
463+
disabled={!isConnected}
464+
bordered
465+
shadow
466+
/>
253467
</div>
254468
</div>
255469
</div>

app/icons/power.svg

+7
Loading

0 commit comments

Comments
 (0)