@@ -20,6 +20,26 @@ import {
2020} from "../../utils/errors" ;
2121
2222const defaultRealtimeUrl = "wss://api.assemblyai.com/v2/realtime/ws" ;
23+ const forceEndOfUtteranceMessage = `{"force_end_utterance":true}` ;
24+ const terminateSessionMessage = `{"terminate_session":true}` ;
25+
26+ type BufferLike =
27+ | string
28+ | Buffer
29+ | DataView
30+ | number
31+ | ArrayBufferView
32+ | Uint8Array
33+ | ArrayBuffer
34+ | SharedArrayBuffer
35+ | ReadonlyArray < unknown >
36+ | ReadonlyArray < number >
37+ | { valueOf ( ) : ArrayBuffer }
38+ | { valueOf ( ) : SharedArrayBuffer }
39+ | { valueOf ( ) : Uint8Array }
40+ | { valueOf ( ) : ReadonlyArray < number > }
41+ | { valueOf ( ) : string }
42+ | { [ Symbol . toPrimitive ] ( hint : string ) : string } ;
2343
2444export class RealtimeTranscriber {
2545 private realtimeUrl : string ;
@@ -28,6 +48,7 @@ export class RealtimeTranscriber {
2848 private encoding ?: AudioEncoding ;
2949 private apiKey ?: string ;
3050 private token ?: string ;
51+ private end_utterance_silence_threshold ?: number ;
3152 private socket ?: WebSocket ;
3253 private listeners : RealtimeListeners = { } ;
3354 private sessionTerminatedResolve ?: ( ) => void ;
@@ -37,6 +58,8 @@ export class RealtimeTranscriber {
3758 this . sampleRate = params . sampleRate ?? 16_000 ;
3859 this . wordBoost = params . wordBoost ;
3960 this . encoding = params . encoding ;
61+ this . end_utterance_silence_threshold =
62+ params . end_utterance_silence_threshold ;
4063 if ( "token" in params && params . token ) this . token = params . token ;
4164 if ( "apiKey" in params && params . apiKey ) this . apiKey = params . apiKey ;
4265
@@ -105,6 +128,18 @@ export class RealtimeTranscriber {
105128 }
106129 this . socket . binaryType = "arraybuffer" ;
107130
131+ this . socket . onopen = ( ) => {
132+ if (
133+ this . end_utterance_silence_threshold === undefined ||
134+ this . end_utterance_silence_threshold === null
135+ ) {
136+ return ;
137+ }
138+ this . configureEndUtteranceSilenceThreshold (
139+ this . end_utterance_silence_threshold
140+ ) ;
141+ } ;
142+
108143 this . socket . onclose = ( { code, reason } : CloseEvent ) => {
109144 if ( ! reason ) {
110145 if ( code in RealtimeErrorType ) {
@@ -159,10 +194,7 @@ export class RealtimeTranscriber {
159194 }
160195
161196 sendAudio ( audio : AudioData ) {
162- if ( ! this . socket || this . socket . readyState !== WebSocket . OPEN ) {
163- throw new Error ( "Socket is not open for communication" ) ;
164- }
165- this . socket . send ( audio ) ;
197+ this . send ( audio ) ;
166198 }
167199
168200 stream ( ) : WritableStream < AudioData > {
@@ -173,10 +205,32 @@ export class RealtimeTranscriber {
173205 } ) ;
174206 }
175207
208+ /**
209+ * Manually end an utterance
210+ */
211+ forceEndUtterance ( ) {
212+ this . send ( forceEndOfUtteranceMessage ) ;
213+ }
214+
215+ /**
216+ * Configure the threshold for how long to wait before ending an utterance. Default is 700ms.
217+ * @param threshold The duration of the end utterance silence threshold in milliseconds
218+ * @format integer
219+ */
220+ configureEndUtteranceSilenceThreshold ( threshold : number ) {
221+ this . send ( `{"end_utterance_silence_threshold":${ threshold } }` ) ;
222+ }
223+
224+ private send ( data : BufferLike ) {
225+ if ( ! this . socket || this . socket . readyState !== WebSocket . OPEN ) {
226+ throw new Error ( "Socket is not open for communication" ) ;
227+ }
228+ this . socket . send ( data ) ;
229+ }
230+
176231 async close ( waitForSessionTermination = true ) {
177232 if ( this . socket ) {
178233 if ( this . socket . readyState === WebSocket . OPEN ) {
179- const terminateSessionMessage = `{"terminate_session": true}` ;
180234 if ( waitForSessionTermination ) {
181235 const sessionTerminatedPromise = new Promise < void > ( ( resolve ) => {
182236 this . sessionTerminatedResolve = resolve ;
0 commit comments