diff --git a/CHANGELOG.md b/CHANGELOG.md index 9898af4..3236f35 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,15 @@ # Changelog +## [4.2.0] - 2024-01-11 + +### Added + +- Add `content_safety_confidence` to `TranscriptParams` & `TranscriptOptionalParams`. + +### Changed + +- The `RealtimeService` now sends audio as binary instead of a base64-encoded JSON object. + ## [4.1.0] - 2023-12-22 ### Added diff --git a/package.json b/package.json index dfde867..787dc27 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "assemblyai", - "version": "4.1.0", + "version": "4.2.0", "description": "The AssemblyAI JavaScript SDK provides an easy-to-use interface for interacting with the AssemblyAI API, which supports async and real-time transcription, as well as the latest LeMUR models.", "engines": { "node": ">=18" diff --git a/src/services/realtime/service.ts b/src/services/realtime/service.ts index 2b76a7e..8ab2235 100644 --- a/src/services/realtime/service.ts +++ b/src/services/realtime/service.ts @@ -102,6 +102,7 @@ export class RealtimeService { headers: { Authorization: this.apiKey }, }); } + this.socket.binaryType = "arraybuffer"; this.socket.onclose = ({ code, reason }: CloseEvent) => { if (!reason) { @@ -160,23 +161,7 @@ export class RealtimeService { if (!this.socket || this.socket.readyState !== WebSocket.OPEN) { throw new Error("Socket is not open for communication"); } - let audioData; - if (typeof Buffer !== "undefined") { - audioData = Buffer.from(audio).toString("base64"); - } else { - // Buffer is not available in the browser by default - // https://stackoverflow.com/a/42334410/2919731 - audioData = btoa( - new Uint8Array(audio).reduce( - (data, byte) => data + String.fromCharCode(byte), - "" - ) - ); - } - const payload = { - audio_data: audioData, - }; - this.socket.send(JSON.stringify(payload)); + this.socket.send(audio); } stream(): WritableStream { diff --git a/src/types/openapi.generated.ts b/src/types/openapi.generated.ts index b318d7a..f68cc6b 100644 --- a/src/types/openapi.generated.ts +++ b/src/types/openapi.generated.ts @@ -514,6 +514,12 @@ export type LemurBaseParams = { } ] >; + /** + * @description The model that is used for the final prompt after compression is performed. + * Defaults to "default". + * + * @default default + */ final_model?: LiteralUnion; /** * @description Custom formatted transcript data. Maximum size is the context limit of the selected model, which defaults to 100000. @@ -2305,6 +2311,8 @@ export type TranscriptOptionalParams = { boost_param?: TranscriptBoostParam; /** @description Enable [Content Moderation](https://www.assemblyai.com/docs/models/content-moderation), can be true or false */ content_safety?: boolean; + /** @description The confidence threshold for content moderation. Values must be between 25 and 100. */ + content_safety_confidence?: number; /** @description Customize how words are spelled and formatted using to and from values */ custom_spelling?: TranscriptCustomSpelling[]; /** @description Whether custom topics is enabled, either true or false */ diff --git a/tests/realtime.test.ts b/tests/realtime.test.ts index 034b44e..84fe72c 100644 --- a/tests/realtime.test.ts +++ b/tests/realtime.test.ts @@ -136,9 +136,7 @@ describe("realtime", () => { it("can send audio", async () => { const data = new ArrayBuffer(8); rt.sendAudio(data); - await expect(server).toReceiveMessage( - JSON.stringify({ audio_data: Buffer.from(data).toString("base64") }) - ); + await expect(server).toReceiveMessage(data); }); it("can send audio using stream", async () => { @@ -146,10 +144,9 @@ describe("realtime", () => { const writer = stream.writable.getWriter(); stream.readable.pipeTo(rt.stream()); await writer.ready; - writer.write(Buffer.alloc(5_000)); - await expect(server).toReceiveMessage( - JSON.stringify({ audio_data: Buffer.alloc(5_000).toString("base64") }) - ); + const data = Buffer.alloc(5_000); + writer.write(data); + await expect(server).toReceiveMessage(data); }); it("can receive transcript", () => {