From 1e6f05b1f647c80225bca171b89adc0fa6ff3611 Mon Sep 17 00:00:00 2001
From: Cale Shapera <25466659+cshape@users.noreply.github.com>
Date: Fri, 28 Nov 2025 15:01:41 -0800
Subject: [PATCH 1/3] added inworld TTS plugin and example

---
 examples/package.json              |   1 +
 examples/src/inworld_tts.ts        | 112 +++++
 plugins/inworld/api-extractor.json |  21 +
 plugins/inworld/package.json       |  52 +++
 plugins/inworld/src/index.ts       |  20 +
 plugins/inworld/src/tts.ts         | 655 +++++++++++++++++++++++++++++
 plugins/inworld/tsconfig.json      |  17 +
 plugins/inworld/tsup.config.ts     |   8 +
 pnpm-lock.yaml                     |  64 ++-
 9 files changed, 914 insertions(+), 36 deletions(-)
 create mode 100644 examples/src/inworld_tts.ts
 create mode 100644 plugins/inworld/api-extractor.json
 create mode 100644 plugins/inworld/package.json
 create mode 100644 plugins/inworld/src/index.ts
 create mode 100644 plugins/inworld/src/tts.ts
 create mode 100644 plugins/inworld/tsconfig.json
 create mode 100644 plugins/inworld/tsup.config.ts

diff --git a/examples/package.json b/examples/package.json
index edbf7796..1bcac74c 100644
--- a/examples/package.json
+++ b/examples/package.json
@@ -29,6 +29,7 @@
     "@livekit/agents-plugin-deepgram": "workspace:*",
     "@livekit/agents-plugin-elevenlabs": "workspace:*",
     "@livekit/agents-plugin-google": "workspace:*",
+    "@livekit/agents-plugin-inworld": "workspace:*",
     "@livekit/agents-plugin-livekit": "workspace:*",
     "@livekit/agents-plugin-neuphonic": "workspace:*",
     "@livekit/agents-plugin-openai": "workspace:*",
diff --git a/examples/src/inworld_tts.ts b/examples/src/inworld_tts.ts
new file mode 100644
index 00000000..faedf829
--- /dev/null
+++ b/examples/src/inworld_tts.ts
@@ -0,0 +1,112 @@
+// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+import {
+    type JobContext,
+    type JobProcess,
+    WorkerOptions,
+    cli,
+    defineAgent,
+    metrics,
+    voice,
+  } from '@livekit/agents';
+  import * as livekit from '@livekit/agents-plugin-livekit';
+  import * as inworld from '@livekit/agents-plugin-inworld';
+  import * as silero from '@livekit/agents-plugin-silero';
+  import { BackgroundVoiceCancellation } from '@livekit/noise-cancellation-node';
+  import { fileURLToPath } from 'node:url';
+  
+  export default defineAgent({
+    prewarm: async (proc: JobProcess) => {
+      proc.userData.vad = await silero.VAD.load();
+    },
+    entry: async (ctx: JobContext) => {
+      const agent = new voice.Agent({
+        instructions:
+          "You are a helpful assistant, you can hear the user's message and respond to it in 1-2 short sentences."
+      });
+
+      // Create TTS instance
+      const tts = new inworld.TTS({
+        timestampType: 'WORD',
+        voice: 'Hades',
+        model: 'inworld-tts-1',
+        encoding: 'LINEAR16',
+        textNormalization: 'ON',
+        bitRate: 64000,
+        sampleRate: 24000,
+        speakingRate: 1.0,
+        temperature: 1.1,
+        bufferCharThreshold: 100,
+        maxBufferDelayMs: 3000
+      });
+
+      // List available voices (non-blocking)
+      tts.listVoices().then((voices: inworld.Voice[]) => {
+        console.log(`[Inworld TTS] ${voices.length} voices available in this workspace`);
+        if (voices.length > 0) {
+          console.log('[Inworld TTS] Logging information for first voice:', JSON.stringify(voices[0], null, 2));
+        }
+      }).catch((err: Error) => {
+        console.error('[Inworld TTS] Failed to list voices:', err);
+      });
+  
+      const session = new voice.AgentSession({
+        // Speech-to-text (STT) is your agent's ears, turning the user's speech into text that the LLM can understand
+        // See all available models at https://docs.livekit.io/agents/models/stt/
+        stt: 'assemblyai/universal-streaming:en',
+        // A Large Language Model (LLM) is your agent's brain, processing user input and generating a response
+        // See all available models at https://docs.livekit.io/agents/models/llm/
+        llm: 'openai/gpt-4.1-mini',
+        // Text-to-speech (TTS) is your agent's voice, turning the LLM's text into speech that the user can hear
+        // See all available models as well as voice selections at https://docs.livekit.io/agents/models/tts/
+        tts,
+        // VAD and turn detection are used to determine when the user is speaking and when the agent should respond
+        // See more at https://docs.livekit.io/agents/build/turns
+        vad: ctx.proc.userData.vad! as silero.VAD,
+        turnDetection: new livekit.turnDetector.MultilingualModel(),
+        // to use realtime model, replace the stt, llm, tts and vad with the following
+        // llm: new openai.realtime.RealtimeModel(),
+        voiceOptions: {
+          // allow the LLM to generate a response while waiting for the end of turn
+          preemptiveGeneration: true,
+        },
+      });
+  
+      // timestamp handling for inworld TTS
+      session.tts!.on('alignment' as any, (data: any) => {
+        if (data.wordAlignment) {
+          const { words, starts, ends } = data.wordAlignment;
+          for (let i = 0; i < words.length; i++) {
+            console.log(`[Inworld TTS] Word: "${words[i]}", Start: ${starts[i]}, End: ${ends[i]}`);
+          }
+        }
+        if (data.characterAlignment) {
+          const { chars, starts, ends } = data.characterAlignment;
+          for (let i = 0; i < chars.length; i++) {
+            console.log(`[Inworld TTS] Char: "${chars[i]}", Start: ${starts[i]}, End: ${ends[i]}`);
+          }
+        }
+      });
+  
+      const usageCollector = new metrics.UsageCollector();
+  
+      session.on(voice.AgentSessionEventTypes.MetricsCollected, (ev) => {
+        metrics.logMetrics(ev.metrics);
+        usageCollector.collect(ev.metrics);
+      });
+  
+      await session.start({
+        agent,
+        room: ctx.room,
+        inputOptions: {
+          noiseCancellation: BackgroundVoiceCancellation(),
+        },
+      });
+  
+      session.say('Hello, how can I help you today?');
+    },
+  });
+  
+  cli.runApp(new WorkerOptions({ agent: fileURLToPath(import.meta.url) }));
+  
\ No newline at end of file
diff --git a/plugins/inworld/api-extractor.json b/plugins/inworld/api-extractor.json
new file mode 100644
index 00000000..488aaf03
--- /dev/null
+++ b/plugins/inworld/api-extractor.json
@@ -0,0 +1,21 @@
+/**
+ * Config file for API Extractor.  For more info, please visit: https://api-extractor.com
+ */
+{
+  "$schema": "https://developer.microsoft.com/json-schemas/api-extractor/v7/api-extractor.schema.json",
+
+  /**
+   * Optionally specifies another JSON config file that this file extends from.  This provides a way for
+   * standard settings to be shared across multiple projects.
+   *
+   * If the path starts with "./" or "../", the path is resolved relative to the folder of the file that contains
+   * the "extends" field.  Otherwise, the first path segment is interpreted as an NPM package name, and will be
+   * resolved using NodeJS require().
+   *
+   * SUPPORTED TOKENS: none
+   * DEFAULT VALUE: ""
+   */
+  "extends": "../../api-extractor-shared.json",
+  "mainEntryPointFilePath": "./dist/index.d.ts"
+}
+
diff --git a/plugins/inworld/package.json b/plugins/inworld/package.json
new file mode 100644
index 00000000..f2c13066
--- /dev/null
+++ b/plugins/inworld/package.json
@@ -0,0 +1,52 @@
+{
+  "name": "@livekit/agents-plugin-inworld",
+  "version": "0.1.0",
+  "description": "Inworld plugin for LiveKit Node Agents",
+  "main": "dist/index.js",
+  "require": "dist/index.cjs",
+  "types": "dist/index.d.ts",
+  "exports": {
+    "import": {
+      "types": "./dist/index.d.ts",
+      "default": "./dist/index.js"
+    },
+    "require": {
+      "types": "./dist/index.d.cts",
+      "default": "./dist/index.cjs"
+    }
+  },
+  "author": "LiveKit",
+  "type": "module",
+  "repository": "git@github.com:livekit/agents-js.git",
+  "license": "Apache-2.0",
+  "files": [
+    "dist",
+    "src",
+    "README.md"
+  ],
+  "scripts": {
+    "build": "tsup --onSuccess \"pnpm build:types\"",
+    "build:types": "tsc --declaration --emitDeclarationOnly && node ../../scripts/copyDeclarationOutput.js",
+    "clean": "rm -rf dist",
+    "clean:build": "pnpm clean && pnpm build",
+    "lint": "eslint -f unix \"src/**/*.{ts,js}\"",
+    "api:check": "api-extractor run --typescript-compiler-folder ../../node_modules/typescript",
+    "api:update": "api-extractor run --local --typescript-compiler-folder ../../node_modules/typescript --verbose"
+  },
+  "devDependencies": {
+    "@livekit/agents": "workspace:*",
+    "@livekit/rtc-node": "^0.13.12",
+    "@microsoft/api-extractor": "^7.35.0",
+    "@types/ws": "^8.5.10",
+    "tsup": "^8.3.5",
+    "typescript": "^5.0.0"
+  },
+  "dependencies": {
+    "ws": "^8.16.0"
+  },
+  "peerDependencies": {
+    "@livekit/agents": "workspace:*",
+    "@livekit/rtc-node": "^0.13.12"
+  }
+}
+
diff --git a/plugins/inworld/src/index.ts b/plugins/inworld/src/index.ts
new file mode 100644
index 00000000..932950e2
--- /dev/null
+++ b/plugins/inworld/src/index.ts
@@ -0,0 +1,20 @@
+// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+import { Plugin } from '@livekit/agents';
+import { TTS } from './tts.js';
+
+export * from './tts.js';
+
+class InworldPlugin extends Plugin {
+  constructor() {
+    super({
+      title: 'Inworld',
+      version: '0.1.0',
+      package: '@livekit/agents-plugin-inworld',
+    });
+  }
+}
+
+Plugin.registerPlugin(new InworldPlugin());
+
diff --git a/plugins/inworld/src/tts.ts b/plugins/inworld/src/tts.ts
new file mode 100644
index 00000000..ee0f7f78
--- /dev/null
+++ b/plugins/inworld/src/tts.ts
@@ -0,0 +1,655 @@
+// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+import {
+  AudioByteStream,
+  log,
+  shortuuid,
+  tts,
+  tokenize,
+} from '@livekit/agents';
+import { type RawData, WebSocket } from 'ws';
+
+const DEFAULT_BIT_RATE = 64000;
+const DEFAULT_ENCODING = 'LINEAR16';
+const DEFAULT_MODEL = 'inworld-tts-1';
+const DEFAULT_SAMPLE_RATE = 24000; 
+const DEFAULT_URL = 'https://api.inworld.ai/';
+const DEFAULT_WS_URL = 'wss://api.inworld.ai/';
+const DEFAULT_VOICE = 'Ashley';
+const DEFAULT_TEMPERATURE = 1.1;
+const DEFAULT_SPEAKING_RATE = 1.0;
+const DEFAULT_BUFFER_CHAR_THRESHOLD = 100;
+const DEFAULT_MAX_BUFFER_DELAY_MS = 3000;
+const NUM_CHANNELS = 1;
+
+export type Encoding = 'LINEAR16' | 'MP3' | 'OGG_OPUS' | 'ALAW' | 'MULAW' | 'FLAC' | string;
+export type TimestampType = 'TIMESTAMP_TYPE_UNSPECIFIED' | 'WORD' | 'CHARACTER';
+export type TextNormalization = 'APPLY_TEXT_NORMALIZATION_UNSPECIFIED' | 'ON' | 'OFF';
+
+export interface TTSOptions {
+  apiKey?: string;
+  voice: string;
+  model: string;
+  encoding: Encoding;
+  bitRate: number;
+  sampleRate: number;
+  speakingRate: number;
+  temperature: number;
+  timestampType?: TimestampType;
+  textNormalization?: TextNormalization;
+  bufferCharThreshold: number;
+  maxBufferDelayMs: number;
+  baseURL: string;
+  wsURL: string;
+  tokenizer?: tokenize.SentenceTokenizer;
+}
+
+// API request/response types
+interface AudioConfig {
+  audioEncoding: Encoding;
+  sampleRateHertz: number;
+  bitrate: number;
+  speakingRate: number;
+  temperature?: number;
+}
+
+interface SynthesizeRequest {
+  text: string;
+  voiceId: string;
+  modelId: string;
+  audioConfig: AudioConfig;
+  temperature: number;
+  timestampType?: TimestampType;
+  applyTextNormalization?: TextNormalization;
+}
+
+interface CreateContextConfig {
+  voiceId: string;
+  modelId: string;
+  audioConfig: AudioConfig;
+  temperature: number;
+  bufferCharThreshold: number;
+  maxBufferDelayMs: number;
+  timestampType?: TimestampType;
+  applyTextNormalization?: TextNormalization;
+}
+
+interface WordAlignment {
+  words: string[];
+  wordStartTimeSeconds: number[];
+  wordEndTimeSeconds: number[];
+}
+
+interface CharacterAlignment {
+  characters: string[];
+  characterStartTimeSeconds: number[];
+  characterEndTimeSeconds: number[];
+}
+
+interface TimestampInfo {
+  wordAlignment?: WordAlignment;
+  characterAlignment?: CharacterAlignment;
+}
+
+interface AudioChunk {
+  audioContent?: string;
+  timestampInfo?: TimestampInfo;
+}
+
+interface InworldResult {
+  contextId?: string;
+  contextCreated?: boolean;
+  contextClosed?: boolean;
+  audioChunk?: AudioChunk;
+  audioContent?: string;
+  status?: { code: number; message: string };
+}
+
+interface InworldMessage {
+  result?: InworldResult;
+  contextId?: string;
+  error?: { message: string };
+}
+
+export interface Voice {
+  voiceId: string;
+  displayName: string;
+  description: string;
+  languages: string[];
+  tags: string[];
+}
+
+interface ListVoicesResponse {
+  voices: Voice[];
+}
+
+const defaultTTSOptionsBase: Omit<TTSOptions, 'tokenizer'> = {
+  apiKey: process.env.INWORLD_API_KEY,
+  voice: DEFAULT_VOICE,
+  model: DEFAULT_MODEL,
+  encoding: DEFAULT_ENCODING as Encoding,
+  bitRate: DEFAULT_BIT_RATE,
+  sampleRate: DEFAULT_SAMPLE_RATE,
+  speakingRate: DEFAULT_SPEAKING_RATE,
+  temperature: DEFAULT_TEMPERATURE,
+  bufferCharThreshold: DEFAULT_BUFFER_CHAR_THRESHOLD,
+  maxBufferDelayMs: DEFAULT_MAX_BUFFER_DELAY_MS,
+  baseURL: DEFAULT_URL,
+  wsURL: DEFAULT_WS_URL,
+};
+
+const MAX_RETRIES = 2;
+const BASE_DELAY_MS = 1000;
+
+class WSConnectionPool {
+  #ws?: WebSocket;
+  #url: string;
+  #auth: string;
+  #connecting?: Promise<WebSocket>;
+  #listeners: Map<string, (msg: InworldMessage) => void> = new Map();
+  #logger = log();
+
+  constructor(url: string, auth: string) {
+    this.#url = url;
+    this.#auth = auth;
+  }
+
+  async getConnection(): Promise<WebSocket> {
+    if (this.#ws && this.#ws.readyState === WebSocket.OPEN) {
+      return this.#ws;
+    }
+
+    if (this.#connecting) {
+      return this.#connecting;
+    }
+
+    this.#connecting = this.#connectWithRetry();
+    return this.#connecting;
+  }
+
+  async #connectWithRetry(): Promise<WebSocket> {
+    let lastError: Error | undefined;
+
+    for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
+      try {
+        return await this.#attemptConnection();
+      } catch (err) {
+        lastError = err instanceof Error ? err : new Error(String(err));
+        this.#connecting = undefined;
+
+        if (attempt < MAX_RETRIES) {
+          // Exponential backoff: 1s, 2s
+          const delayMs = BASE_DELAY_MS * Math.pow(2, attempt);
+          this.#logger.warn(
+            { error: lastError, attempt: attempt + 1, maxRetries: MAX_RETRIES + 1, delayMs },
+            `Failed to connect to Inworld, retrying in ${delayMs}ms`,
+          );
+          await new Promise((resolve) => setTimeout(resolve, delayMs));
+        }
+      }
+    }
+
+    throw new Error(
+      `Failed to connect to Inworld after ${MAX_RETRIES + 1} attempts: ${lastError?.message}`,
+    );
+  }
+
+  #attemptConnection(): Promise<WebSocket> {
+    return new Promise((resolve, reject) => {
+      const wsUrl = new URL('tts/v1/voice:streamBidirectional', this.#url);
+      // Ensure protocol is wss
+      if (wsUrl.protocol === 'https:') wsUrl.protocol = 'wss:';
+      else if (wsUrl.protocol === 'http:') wsUrl.protocol = 'ws:';
+
+      const ws = new WebSocket(wsUrl.toString(), {
+        headers: { Authorization: this.#auth },
+      });
+
+      ws.on('open', () => {
+        this.#ws = ws;
+        this.#connecting = undefined;
+        resolve(ws);
+      });
+
+      ws.on('error', (err) => {
+        if (this.#connecting) {
+          reject(err);
+        } else {
+          this.#logger.error({ err }, 'Inworld WebSocket error');
+        }
+      });
+
+      ws.on('close', () => {
+        this.#ws = undefined;
+        this.#connecting = undefined;
+      });
+
+      ws.on('message', (data: RawData) => {
+        try {
+          const json = JSON.parse(data.toString()) as InworldMessage;
+          const result = json.result;
+          if (result) {
+            // Try to find contextId in result or top level
+            const contextId = result.contextId || json.contextId;
+            if (contextId && this.#listeners.has(contextId)) {
+              this.#listeners.get(contextId)!(json);
+            }
+          } else if (json.error) {
+             this.#logger.warn({ error: json.error }, 'Inworld received error message');
+          }
+        } catch (e) {
+          this.#logger.warn({ error: e }, 'Failed to parse Inworld WebSocket message');
+        }
+      });
+    });
+  }
+
+  registerListener(contextId: string, cb: (msg: InworldMessage) => void) {
+    this.#listeners.set(contextId, cb);
+  }
+
+  unregisterListener(contextId: string) {
+    this.#listeners.delete(contextId);
+  }
+
+  close() {
+    if (this.#ws) {
+      this.#ws.close();
+      this.#ws = undefined;
+    }
+  }
+}
+
+export class TTS extends tts.TTS {
+  #opts: TTSOptions;
+  #pool: WSConnectionPool;
+  #authorization: string;
+  label = 'inworld.TTS';
+
+  constructor(opts: Partial<TTSOptions> = {}) {
+    const mergedOpts = { ...defaultTTSOptionsBase, ...opts };
+    if (!mergedOpts.apiKey) {
+      throw new Error('Inworld API key required. Set INWORLD_API_KEY or provide apiKey.');
+    }
+
+    super(mergedOpts.sampleRate, NUM_CHANNELS, {
+      streaming: true,
+    });
+
+    this.#opts = mergedOpts as TTSOptions;
+    if (!this.#opts.tokenizer) {
+      this.#opts.tokenizer = new tokenize.basic.SentenceTokenizer();
+    }
+    this.#authorization = `Basic ${mergedOpts.apiKey}`;
+    this.#pool = new WSConnectionPool(this.#opts.wsURL, this.#authorization);
+  }
+
+  get pool(): WSConnectionPool {
+    return this.#pool;
+  }
+
+  get authorization(): string {
+    return this.#authorization;
+  }
+
+  /**
+   * List all available voices.
+   * @param language - Optional ISO 639-1 language code to filter voices (e.g., 'en', 'es', 'fr')
+   */
+  async listVoices(language?: string): Promise<Voice[]> {
+    const url = new URL('tts/v1/voices', this.#opts.baseURL);
+    if (language) {
+      url.searchParams.set('filter', `language=${language}`);
+    }
+
+    const response = await fetch(url.toString(), {
+      method: 'GET',
+      headers: {
+        Authorization: this.#authorization,
+      },
+    });
+
+    if (!response.ok) {
+      const errorBody = await response.json().catch(() => ({}));
+      throw new Error(
+        `Inworld API error: ${response.status} ${response.statusText}${errorBody.message ? ` - ${errorBody.message}` : ''}`,
+      );
+    }
+
+    const data = (await response.json()) as ListVoicesResponse;
+    return data.voices;
+  }
+
+  synthesize(text: string): tts.ChunkedStream {
+    return new ChunkedStream(this, text, this.#opts);
+  }
+
+  stream(): tts.SynthesizeStream {
+    return new SynthesizeStream(this, this.#opts);
+  }
+
+  updateOptions(opts: Partial<TTSOptions>) {
+    this.#opts = { ...this.#opts, ...opts };
+    if (opts.apiKey) {
+        this.#authorization = `Basic ${opts.apiKey}`;
+        // If API key changes, we might need to reset the pool or create a new one?
+        // For now, assume WS url doesn't change or we just create new pool if needed.
+        // But existing pool has hardcoded auth in constructor.
+        // Re-creating pool is safer.
+        this.#pool.close();
+        this.#pool = new WSConnectionPool(this.#opts.wsURL, this.#authorization);
+    }
+  }
+
+  async close() {
+      this.#pool.close();
+  }
+}
+
+class ChunkedStream extends tts.ChunkedStream {
+  #opts: TTSOptions;
+  #tts: TTS;
+  label = 'inworld.ChunkedStream';
+
+  constructor(ttsInstance: TTS, text: string, opts: TTSOptions) {
+    super(text, ttsInstance);
+    this.#tts = ttsInstance;
+    this.#opts = opts;
+  }
+
+  protected async run() {
+    const audioConfig: AudioConfig = {
+      audioEncoding: this.#opts.encoding,
+      bitrate: this.#opts.bitRate,
+      sampleRateHertz: this.#opts.sampleRate,
+      temperature: this.#opts.temperature,
+      speakingRate: this.#opts.speakingRate,
+    };
+
+    const bodyParams: SynthesizeRequest = {
+      text: this.inputText,
+      voiceId: this.#opts.voice,
+      modelId: this.#opts.model,
+      audioConfig: audioConfig,
+      temperature: this.#opts.temperature,
+      timestampType: this.#opts.timestampType,
+      applyTextNormalization: this.#opts.textNormalization,
+    };
+
+    const url = new URL('tts/v1/voice:stream', this.#opts.baseURL);
+    
+    const response = await fetch(url.toString(), {
+      method: 'POST',
+      headers: {
+        Authorization: this.#tts.authorization,
+        'Content-Type': 'application/json',
+      },
+      body: JSON.stringify(bodyParams),
+    });
+
+    if (!response.ok) {
+      throw new Error(`Inworld API error: ${response.status} ${response.statusText}`);
+    }
+
+    if (!response.body) {
+      throw new Error('No response body');
+    }
+
+    const reader = response.body.getReader();
+    const decoder = new TextDecoder();
+    let buffer = '';
+    
+    const requestId = shortuuid();
+    const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);
+
+    while (true) {
+      const { done, value } = await reader.read();
+      if (done) break;
+
+      buffer += decoder.decode(value, { stream: true });
+      const lines = buffer.split('\n');
+      buffer = lines.pop() || '';
+
+      for (const line of lines) {
+            if (line.trim()) {
+              try {
+                const data = JSON.parse(line);
+                if (data.result) {
+                   if (data.result.audioContent) {
+                     const audio = Buffer.from(data.result.audioContent, 'base64');
+                     
+                     let pcmData = audio;
+                     if (audio.length > 44 && audio.subarray(0, 4).toString() === 'RIFF') {
+                         // This is a WAV header, skip 44 bytes
+                         pcmData = audio.subarray(44);
+                     }
+                     
+                     for (const frame of bstream.write(
+                       pcmData.buffer.slice(pcmData.byteOffset, pcmData.byteOffset + pcmData.byteLength),
+                     )) {
+                       this.queue.put({
+                         requestId,
+                         segmentId: requestId,
+                         frame,
+                         final: false,
+                       });
+                     }
+                   }
+                } else if (data.error) {
+                   throw new Error(data.error.message);
+                }
+              } catch (e) {
+                log().warn({ error: e, line }, 'Failed to parse Inworld chunk');
+              }
+            }
+      }
+    }
+    
+    // Flush remaining frames
+    for (const frame of bstream.flush()) {
+      this.queue.put({
+        requestId,
+        segmentId: requestId,
+        frame,
+        final: false,
+      });
+    }
+  }
+}
+
+class SynthesizeStream extends tts.SynthesizeStream {
+  #opts: TTSOptions;
+  #tts: TTS;
+  #contextId: string;
+  label = 'inworld.SynthesizeStream';
+
+  constructor(ttsInstance: TTS, opts: TTSOptions) {
+    super(ttsInstance);
+    this.#tts = ttsInstance;
+    this.#opts = opts;
+    this.#contextId = shortuuid();
+  }
+
+  protected async run() {
+    const ws = await this.#tts.pool.getConnection();
+    const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);
+    const tokenizerStream = this.#opts.tokenizer!.stream();
+
+    let resolveProcessing: () => void;
+    let rejectProcessing: (err: Error) => void;
+    const processing = new Promise<void>((resolve, reject) => {
+        resolveProcessing = resolve;
+        rejectProcessing = reject;
+    });
+
+    const handleMessage = (msg: InworldMessage) => {
+      const result = msg.result;
+      if (!result) return;
+
+      if (result.contextCreated) {
+          // context created
+      } else if (result.contextClosed) {
+          resolveProcessing();
+      } else if (result.audioChunk) {
+          if (result.audioChunk.timestampInfo) {
+              // Log word timestamps if available
+              const tsInfo = result.audioChunk.timestampInfo;
+              if (tsInfo.wordAlignment) {
+                  const words = tsInfo.wordAlignment.words || [];
+                  const starts = tsInfo.wordAlignment.wordStartTimeSeconds || [];
+                  const ends = tsInfo.wordAlignment.wordEndTimeSeconds || [];
+                  
+                  for (let i = 0; i < words.length; i++) {
+                      // console.log(`[Inworld TTS] Word: "${words[i]}", Start: ${starts[i]}, End: ${ends[i]}`);
+                  }
+                  
+                  (this.#tts as any).emit('alignment', {
+                      requestId: this.#contextId,
+                      segmentId: this.#contextId,
+                      wordAlignment: { words, starts, ends }
+                  });
+              }
+              
+              if (tsInfo.characterAlignment) {
+                  const chars = tsInfo.characterAlignment.characters || [];
+                  const starts = tsInfo.characterAlignment.characterStartTimeSeconds || [];
+                  const ends = tsInfo.characterAlignment.characterEndTimeSeconds || [];
+                  
+                  (this.#tts as any).emit('alignment', {
+                      requestId: this.#contextId,
+                      segmentId: this.#contextId,
+                      characterAlignment: { chars, starts, ends }
+                  });
+              }
+          }
+
+          if (result.audioChunk.audioContent) {
+              // Some servers may return either nested audioContent or top-level
+              const b64Content = result.audioChunk.audioContent || result.audioContent;
+              if (b64Content) {
+                  const audio = Buffer.from(b64Content, 'base64');
+                  let pcmData = audio;
+                  if (audio.length > 44 && audio.subarray(0, 4).toString() === 'RIFF') {
+                       // This is a WAV header, skip 44 bytes
+                       pcmData = audio.subarray(44);
+                  }
+                  for (const frame of bstream.write(
+                    pcmData.buffer.slice(pcmData.byteOffset, pcmData.byteOffset + pcmData.byteLength),
+                  )) {
+                      this.queue.put({
+                          requestId: this.#contextId,
+                          segmentId: this.#contextId,
+                          frame,
+                          final: false,
+                      });
+                  }
+              }
+          }
+      } else if (result.status && result.status.code !== 0) {
+          const error = new Error(`Inworld stream error: ${result.status.message}`);
+          rejectProcessing(error);
+      }
+    };
+
+    this.#tts.pool.registerListener(this.#contextId, handleMessage);
+
+    const sendLoop = async () => {
+        for await (const ev of tokenizerStream) {
+            await this.#sendText(ws, ev.token);
+        }
+    };
+    const sendPromise = sendLoop();
+
+    try {
+      await this.#createContext(ws);
+      
+      for await (const text of this.input) {
+        if (text === tts.SynthesizeStream.FLUSH_SENTINEL) {
+            tokenizerStream.flush();
+        } else {
+            tokenizerStream.pushText(text);
+        }
+      }
+      tokenizerStream.endInput();
+      await sendPromise;
+
+      await this.#flushContext(ws);
+      await this.#closeContext(ws);
+      await processing;
+      
+      // Flush remaining frames
+      for (const frame of bstream.flush()) {
+          this.queue.put({
+              requestId: this.#contextId,
+              segmentId: this.#contextId,
+              frame,
+              final: false,
+          });
+      }
+
+    } catch (e) {
+        log().error({ error: e }, 'Error in SynthesizeStream run');
+        throw e;
+    } finally {
+      this.#tts.pool.unregisterListener(this.#contextId);
+    }
+  }
+
+  #send(ws: WebSocket, data: object): Promise<void> {
+    return new Promise((resolve, reject) => {
+      if (ws.readyState !== WebSocket.OPEN) {
+        reject(new Error('WebSocket is not open'));
+        return;
+      }
+      ws.send(JSON.stringify(data), (err) => {
+        if (err) {
+          reject(err);
+        } else {
+          resolve();
+        }
+      });
+    });
+  }
+
+  #createContext(ws: WebSocket): Promise<void> {
+    const config: CreateContextConfig = {
+      voiceId: this.#opts.voice,
+      modelId: this.#opts.model,
+      audioConfig: {
+        audioEncoding: this.#opts.encoding,
+        sampleRateHertz: this.#opts.sampleRate,
+        bitrate: this.#opts.bitRate,
+        speakingRate: this.#opts.speakingRate,
+      },
+      temperature: this.#opts.temperature,
+      bufferCharThreshold: this.#opts.bufferCharThreshold,
+      maxBufferDelayMs: this.#opts.maxBufferDelayMs,
+      timestampType: this.#opts.timestampType,
+      applyTextNormalization: this.#opts.textNormalization,
+    };
+
+    return this.#send(ws, { create: config, contextId: this.#contextId });
+  }
+
+  #sendText(ws: WebSocket, text: string): Promise<void> {
+    return this.#send(ws, {
+      send_text: { text },
+      contextId: this.#contextId,
+    });
+  }
+
+  #flushContext(ws: WebSocket): Promise<void> {
+    return this.#send(ws, {
+      flush_context: {},
+      contextId: this.#contextId,
+    });
+  }
+
+  #closeContext(ws: WebSocket): Promise<void> {
+    return this.#send(ws, {
+      close_context: {},
+      contextId: this.#contextId,
+    });
+  }
+}
diff --git a/plugins/inworld/tsconfig.json b/plugins/inworld/tsconfig.json
new file mode 100644
index 00000000..a1617574
--- /dev/null
+++ b/plugins/inworld/tsconfig.json
@@ -0,0 +1,17 @@
+{
+  "extends": "../../tsconfig.json",
+  "include": ["./src"],
+  "compilerOptions": {
+    // match output dir to input dir. e.g. dist/index instead of dist/src/index
+    "rootDir": "./src",
+    "declarationDir": "./dist",
+    "outDir": "./dist"
+  },
+  "typedocOptions": {
+    "name": "plugins/agents-plugin-inworld",
+    "entryPointStrategy": "resolve",
+    "readme": "none",
+    "entryPoints": ["src/index.ts"]
+  }
+}
+
diff --git a/plugins/inworld/tsup.config.ts b/plugins/inworld/tsup.config.ts
new file mode 100644
index 00000000..96bdaec8
--- /dev/null
+++ b/plugins/inworld/tsup.config.ts
@@ -0,0 +1,8 @@
+import { defineConfig } from 'tsup';
+
+import defaults from '../../tsup.config';
+
+export default defineConfig({
+  ...defaults,
+});
+
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 3a81f39b..39b53189 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -236,6 +236,9 @@ importers:
       '@livekit/agents-plugin-google':
         specifier: workspace:*
         version: link:../plugins/google
+      '@livekit/agents-plugin-inworld':
+        specifier: workspace:*
+        version: link:../plugins/inworld
       '@livekit/agents-plugin-livekit':
         specifier: workspace:*
         version: link:../plugins/livekit
@@ -499,6 +502,31 @@ importers:
         specifier: ^5.0.0
         version: 5.4.5
 
+  plugins/inworld:
+    dependencies:
+      ws:
+        specifier: ^8.16.0
+        version: 8.18.3
+    devDependencies:
+      '@livekit/agents':
+        specifier: workspace:*
+        version: link:../../agents
+      '@livekit/rtc-node':
+        specifier: ^0.13.12
+        version: 0.13.13
+      '@microsoft/api-extractor':
+        specifier: ^7.35.0
+        version: 7.43.7(@types/node@22.15.30)
+      '@types/ws':
+        specifier: ^8.5.10
+        version: 8.5.10
+      tsup:
+        specifier: ^8.3.5
+        version: 8.4.0(@microsoft/api-extractor@7.43.7(@types/node@22.15.30))(postcss@8.4.38)(tsx@4.20.4)(typescript@5.4.5)
+      typescript:
+        specifier: ^5.0.0
+        version: 5.4.5
+
   plugins/livekit:
     dependencies:
       '@huggingface/hub':
@@ -1437,92 +1465,78 @@ packages:
     resolution: {integrity: sha512-RXwd0CgG+uPRX5YYrkzKyalt2OJYRiJQ8ED/fi1tq9WQW2jsQIn0tqrlR5l5dr/rjqq6AHAxURhj2DVjyQWSOA==}
     cpu: [arm64]
     os: [linux]
-    libc: [glibc]
 
   '@img/sharp-libvips-linux-arm@1.2.0':
     resolution: {integrity: sha512-mWd2uWvDtL/nvIzThLq3fr2nnGfyr/XMXlq8ZJ9WMR6PXijHlC3ksp0IpuhK6bougvQrchUAfzRLnbsen0Cqvw==}
     cpu: [arm]
     os: [linux]
-    libc: [glibc]
 
   '@img/sharp-libvips-linux-ppc64@1.2.0':
     resolution: {integrity: sha512-Xod/7KaDDHkYu2phxxfeEPXfVXFKx70EAFZ0qyUdOjCcxbjqyJOEUpDe6RIyaunGxT34Anf9ue/wuWOqBW2WcQ==}
     cpu: [ppc64]
     os: [linux]
-    libc: [glibc]
 
   '@img/sharp-libvips-linux-s390x@1.2.0':
     resolution: {integrity: sha512-eMKfzDxLGT8mnmPJTNMcjfO33fLiTDsrMlUVcp6b96ETbnJmd4uvZxVJSKPQfS+odwfVaGifhsB07J1LynFehw==}
     cpu: [s390x]
     os: [linux]
-    libc: [glibc]
 
   '@img/sharp-libvips-linux-x64@1.2.0':
     resolution: {integrity: sha512-ZW3FPWIc7K1sH9E3nxIGB3y3dZkpJlMnkk7z5tu1nSkBoCgw2nSRTFHI5pB/3CQaJM0pdzMF3paf9ckKMSE9Tg==}
     cpu: [x64]
     os: [linux]
-    libc: [glibc]
 
   '@img/sharp-libvips-linuxmusl-arm64@1.2.0':
     resolution: {integrity: sha512-UG+LqQJbf5VJ8NWJ5Z3tdIe/HXjuIdo4JeVNADXBFuG7z9zjoegpzzGIyV5zQKi4zaJjnAd2+g2nna8TZvuW9Q==}
     cpu: [arm64]
     os: [linux]
-    libc: [musl]
 
   '@img/sharp-libvips-linuxmusl-x64@1.2.0':
     resolution: {integrity: sha512-SRYOLR7CXPgNze8akZwjoGBoN1ThNZoqpOgfnOxmWsklTGVfJiGJoC/Lod7aNMGA1jSsKWM1+HRX43OP6p9+6Q==}
     cpu: [x64]
     os: [linux]
-    libc: [musl]
 
   '@img/sharp-linux-arm64@0.34.3':
     resolution: {integrity: sha512-QdrKe3EvQrqwkDrtuTIjI0bu6YEJHTgEeqdzI3uWJOH6G1O8Nl1iEeVYRGdj1h5I21CqxSvQp1Yv7xeU3ZewbA==}
     engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0}
     cpu: [arm64]
     os: [linux]
-    libc: [glibc]
 
   '@img/sharp-linux-arm@0.34.3':
     resolution: {integrity: sha512-oBK9l+h6KBN0i3dC8rYntLiVfW8D8wH+NPNT3O/WBHeW0OQWCjfWksLUaPidsrDKpJgXp3G3/hkmhptAW0I3+A==}
     engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0}
     cpu: [arm]
     os: [linux]
-    libc: [glibc]
 
   '@img/sharp-linux-ppc64@0.34.3':
     resolution: {integrity: sha512-GLtbLQMCNC5nxuImPR2+RgrviwKwVql28FWZIW1zWruy6zLgA5/x2ZXk3mxj58X/tszVF69KK0Is83V8YgWhLA==}
     engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0}
     cpu: [ppc64]
     os: [linux]
-    libc: [glibc]
 
   '@img/sharp-linux-s390x@0.34.3':
     resolution: {integrity: sha512-3gahT+A6c4cdc2edhsLHmIOXMb17ltffJlxR0aC2VPZfwKoTGZec6u5GrFgdR7ciJSsHT27BD3TIuGcuRT0KmQ==}
     engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0}
     cpu: [s390x]
     os: [linux]
-    libc: [glibc]
 
   '@img/sharp-linux-x64@0.34.3':
     resolution: {integrity: sha512-8kYso8d806ypnSq3/Ly0QEw90V5ZoHh10yH0HnrzOCr6DKAPI6QVHvwleqMkVQ0m+fc7EH8ah0BB0QPuWY6zJQ==}
     engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0}
     cpu: [x64]
     os: [linux]
-    libc: [glibc]
 
   '@img/sharp-linuxmusl-arm64@0.34.3':
     resolution: {integrity: sha512-vAjbHDlr4izEiXM1OTggpCcPg9tn4YriK5vAjowJsHwdBIdx0fYRsURkxLG2RLm9gyBq66gwtWI8Gx0/ov+JKQ==}
     engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0}
     cpu: [arm64]
     os: [linux]
-    libc: [musl]
 
   '@img/sharp-linuxmusl-x64@0.34.3':
     resolution: {integrity: sha512-gCWUn9547K5bwvOn9l5XGAEjVTTRji4aPTqLzGXHvIr6bIDZKNTA34seMPgM0WmSf+RYBH411VavCejp3PkOeQ==}
     engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0}
     cpu: [x64]
     os: [linux]
-    libc: [musl]
 
   '@img/sharp-wasm32@0.34.3':
     resolution: {integrity: sha512-+CyRcpagHMGteySaWos8IbnXcHgfDn7pO2fiC2slJxvNq9gDipYBN42/RagzctVRKgxATmfqOSulgZv5e1RdMg==}
@@ -1647,14 +1661,12 @@ packages:
     engines: {node: '>= 10'}
     cpu: [arm64]
     os: [linux]
-    libc: [glibc]
 
   '@livekit/rtc-node-linux-x64-gnu@0.13.13':
     resolution: {integrity: sha512-B/SgbeBRobpA5LqmDEoBJHpRXePpoF4RO4F0zJf9BdkDhOR0j77p6hD0ZiOuPTRoBzUqukpsTszp+lZnHoNmiA==}
     engines: {node: '>= 10'}
     cpu: [x64]
     os: [linux]
-    libc: [glibc]
 
   '@livekit/rtc-node-win32-x64-msvc@0.13.13':
     resolution: {integrity: sha512-ygVYV4eHczs3QdaW/p0ADhhm7InUDhFaCYk8OzzIn056ZibZPXzvPizCThZqs8VsDniA01MraZF3qhZZb8IyRg==}
@@ -1981,121 +1993,101 @@ packages:
     resolution: {integrity: sha512-3reX2fUHqN7sffBNqmEyMQVj/CKhIHZd4y631duy0hZqI8Qoqf6lTtmAKvJFYa6bhU95B1D0WgzHkmTg33In0A==}
     cpu: [arm]
     os: [linux]
-    libc: [glibc]
 
   '@rollup/rollup-linux-arm-gnueabihf@4.40.0':
     resolution: {integrity: sha512-y/qUMOpJxBMy8xCXD++jeu8t7kzjlOCkoxxajL58G62PJGBZVl/Gwpm7JK9+YvlB701rcQTzjUZ1JgUoPTnoQA==}
     cpu: [arm]
     os: [linux]
-    libc: [glibc]
 
   '@rollup/rollup-linux-arm-musleabihf@4.17.2':
     resolution: {integrity: sha512-uSqpsp91mheRgw96xtyAGP9FW5ChctTFEoXP0r5FAzj/3ZRv3Uxjtc7taRQSaQM/q85KEKjKsZuiZM3GyUivRg==}
     cpu: [arm]
     os: [linux]
-    libc: [musl]
 
   '@rollup/rollup-linux-arm-musleabihf@4.40.0':
     resolution: {integrity: sha512-GoCsPibtVdJFPv/BOIvBKO/XmwZLwaNWdyD8TKlXuqp0veo2sHE+A/vpMQ5iSArRUz/uaoj4h5S6Pn0+PdhRjg==}
     cpu: [arm]
     os: [linux]
-    libc: [musl]
 
   '@rollup/rollup-linux-arm64-gnu@4.17.2':
     resolution: {integrity: sha512-EMMPHkiCRtE8Wdk3Qhtciq6BndLtstqZIroHiiGzB3C5LDJmIZcSzVtLRbwuXuUft1Cnv+9fxuDtDxz3k3EW2A==}
     cpu: [arm64]
     os: [linux]
-    libc: [glibc]
 
   '@rollup/rollup-linux-arm64-gnu@4.40.0':
     resolution: {integrity: sha512-L5ZLphTjjAD9leJzSLI7rr8fNqJMlGDKlazW2tX4IUF9P7R5TMQPElpH82Q7eNIDQnQlAyiNVfRPfP2vM5Avvg==}
     cpu: [arm64]
     os: [linux]
-    libc: [glibc]
 
   '@rollup/rollup-linux-arm64-musl@4.17.2':
     resolution: {integrity: sha512-NMPylUUZ1i0z/xJUIx6VUhISZDRT+uTWpBcjdv0/zkp7b/bQDF+NfnfdzuTiB1G6HTodgoFa93hp0O1xl+/UbA==}
     cpu: [arm64]
     os: [linux]
-    libc: [musl]
 
   '@rollup/rollup-linux-arm64-musl@4.40.0':
     resolution: {integrity: sha512-ATZvCRGCDtv1Y4gpDIXsS+wfFeFuLwVxyUBSLawjgXK2tRE6fnsQEkE4csQQYWlBlsFztRzCnBvWVfcae/1qxQ==}
     cpu: [arm64]
     os: [linux]
-    libc: [musl]
 
   '@rollup/rollup-linux-loongarch64-gnu@4.40.0':
     resolution: {integrity: sha512-wG9e2XtIhd++QugU5MD9i7OnpaVb08ji3P1y/hNbxrQ3sYEelKJOq1UJ5dXczeo6Hj2rfDEL5GdtkMSVLa/AOg==}
     cpu: [loong64]
     os: [linux]
-    libc: [glibc]
 
   '@rollup/rollup-linux-powerpc64le-gnu@4.17.2':
     resolution: {integrity: sha512-T19My13y8uYXPw/L/k0JYaX1fJKFT/PWdXiHr8mTbXWxjVF1t+8Xl31DgBBvEKclw+1b00Chg0hxE2O7bTG7GQ==}
     cpu: [ppc64]
     os: [linux]
-    libc: [glibc]
 
   '@rollup/rollup-linux-powerpc64le-gnu@4.40.0':
     resolution: {integrity: sha512-vgXfWmj0f3jAUvC7TZSU/m/cOE558ILWDzS7jBhiCAFpY2WEBn5jqgbqvmzlMjtp8KlLcBlXVD2mkTSEQE6Ixw==}
     cpu: [ppc64]
     os: [linux]
-    libc: [glibc]
 
   '@rollup/rollup-linux-riscv64-gnu@4.17.2':
     resolution: {integrity: sha512-BOaNfthf3X3fOWAB+IJ9kxTgPmMqPPH5f5k2DcCsRrBIbWnaJCgX2ll77dV1TdSy9SaXTR5iDXRL8n7AnoP5cg==}
     cpu: [riscv64]
     os: [linux]
-    libc: [glibc]
 
   '@rollup/rollup-linux-riscv64-gnu@4.40.0':
     resolution: {integrity: sha512-uJkYTugqtPZBS3Z136arevt/FsKTF/J9dEMTX/cwR7lsAW4bShzI2R0pJVw+hcBTWF4dxVckYh72Hk3/hWNKvA==}
     cpu: [riscv64]
     os: [linux]
-    libc: [glibc]
 
   '@rollup/rollup-linux-riscv64-musl@4.40.0':
     resolution: {integrity: sha512-rKmSj6EXQRnhSkE22+WvrqOqRtk733x3p5sWpZilhmjnkHkpeCgWsFFo0dGnUGeA+OZjRl3+VYq+HyCOEuwcxQ==}
     cpu: [riscv64]
     os: [linux]
-    libc: [musl]
 
   '@rollup/rollup-linux-s390x-gnu@4.17.2':
     resolution: {integrity: sha512-W0UP/x7bnn3xN2eYMql2T/+wpASLE5SjObXILTMPUBDB/Fg/FxC+gX4nvCfPBCbNhz51C+HcqQp2qQ4u25ok6g==}
     cpu: [s390x]
     os: [linux]
-    libc: [glibc]
 
   '@rollup/rollup-linux-s390x-gnu@4.40.0':
     resolution: {integrity: sha512-SpnYlAfKPOoVsQqmTFJ0usx0z84bzGOS9anAC0AZ3rdSo3snecihbhFTlJZ8XMwzqAcodjFU4+/SM311dqE5Sw==}
     cpu: [s390x]
     os: [linux]
-    libc: [glibc]
 
   '@rollup/rollup-linux-x64-gnu@4.17.2':
     resolution: {integrity: sha512-Hy7pLwByUOuyaFC6mAr7m+oMC+V7qyifzs/nW2OJfC8H4hbCzOX07Ov0VFk/zP3kBsELWNFi7rJtgbKYsav9QQ==}
     cpu: [x64]
     os: [linux]
-    libc: [glibc]
 
   '@rollup/rollup-linux-x64-gnu@4.40.0':
     resolution: {integrity: sha512-RcDGMtqF9EFN8i2RYN2W+64CdHruJ5rPqrlYw+cgM3uOVPSsnAQps7cpjXe9be/yDp8UC7VLoCoKC8J3Kn2FkQ==}
     cpu: [x64]
     os: [linux]
-    libc: [glibc]
 
   '@rollup/rollup-linux-x64-musl@4.17.2':
     resolution: {integrity: sha512-h1+yTWeYbRdAyJ/jMiVw0l6fOOm/0D1vNLui9iPuqgRGnXA0u21gAqOyB5iHjlM9MMfNOm9RHCQ7zLIzT0x11Q==}
     cpu: [x64]
     os: [linux]
-    libc: [musl]
 
   '@rollup/rollup-linux-x64-musl@4.40.0':
     resolution: {integrity: sha512-HZvjpiUmSNx5zFgwtQAV1GaGazT2RWvqeDi0hV+AtC8unqqDSsaFjPxfsO6qPtKRRg25SisACWnJ37Yio8ttaw==}
     cpu: [x64]
     os: [linux]
-    libc: [musl]
 
   '@rollup/rollup-win32-arm64-msvc@4.17.2':
     resolution: {integrity: sha512-tmdtXMfKAjy5+IQsVtDiCfqbynAQE/TQRpWdVataHmhMb9DCoJxp9vLcCBjEQWMiUYxO1QprH/HbY9ragCEFLA==}

From e09963f14beeb8cf605c4350846e940558631393 Mon Sep 17 00:00:00 2001
From: Cale Shapera <25466659+cshape@users.noreply.github.com>
Date: Fri, 28 Nov 2025 15:16:14 -0800
Subject: [PATCH 2/3] chore: lint fix and format

---
 examples/src/inworld_tts.ts  | 203 +++++++++++++++---------------
 plugins/inworld/src/index.ts |   3 +-
 plugins/inworld/src/tts.ts   | 237 +++++++++++++++++------------------
 turbo.json                   |   3 +-
 4 files changed, 223 insertions(+), 223 deletions(-)

diff --git a/examples/src/inworld_tts.ts b/examples/src/inworld_tts.ts
index faedf829..20960d04 100644
--- a/examples/src/inworld_tts.ts
+++ b/examples/src/inworld_tts.ts
@@ -2,111 +2,116 @@
 //
 // SPDX-License-Identifier: Apache-2.0
 import {
-    type JobContext,
-    type JobProcess,
-    WorkerOptions,
-    cli,
-    defineAgent,
-    metrics,
-    voice,
-  } from '@livekit/agents';
-  import * as livekit from '@livekit/agents-plugin-livekit';
-  import * as inworld from '@livekit/agents-plugin-inworld';
-  import * as silero from '@livekit/agents-plugin-silero';
-  import { BackgroundVoiceCancellation } from '@livekit/noise-cancellation-node';
-  import { fileURLToPath } from 'node:url';
-  
-  export default defineAgent({
-    prewarm: async (proc: JobProcess) => {
-      proc.userData.vad = await silero.VAD.load();
-    },
-    entry: async (ctx: JobContext) => {
-      const agent = new voice.Agent({
-        instructions:
-          "You are a helpful assistant, you can hear the user's message and respond to it in 1-2 short sentences."
-      });
+  type JobContext,
+  type JobProcess,
+  WorkerOptions,
+  cli,
+  defineAgent,
+  metrics,
+  voice,
+} from '@livekit/agents';
+import * as inworld from '@livekit/agents-plugin-inworld';
+import * as livekit from '@livekit/agents-plugin-livekit';
+import * as silero from '@livekit/agents-plugin-silero';
+import { BackgroundVoiceCancellation } from '@livekit/noise-cancellation-node';
+import { fileURLToPath } from 'node:url';
 
-      // Create TTS instance
-      const tts = new inworld.TTS({
-        timestampType: 'WORD',
-        voice: 'Hades',
-        model: 'inworld-tts-1',
-        encoding: 'LINEAR16',
-        textNormalization: 'ON',
-        bitRate: 64000,
-        sampleRate: 24000,
-        speakingRate: 1.0,
-        temperature: 1.1,
-        bufferCharThreshold: 100,
-        maxBufferDelayMs: 3000
-      });
+export default defineAgent({
+  prewarm: async (proc: JobProcess) => {
+    proc.userData.vad = await silero.VAD.load();
+  },
+  entry: async (ctx: JobContext) => {
+    const agent = new voice.Agent({
+      instructions:
+        "You are a helpful assistant, you can hear the user's message and respond to it in 1-2 short sentences.",
+    });
 
-      // List available voices (non-blocking)
-      tts.listVoices().then((voices: inworld.Voice[]) => {
+    // Create TTS instance
+    const tts = new inworld.TTS({
+      timestampType: 'WORD',
+      voice: 'Hades',
+      model: 'inworld-tts-1',
+      encoding: 'LINEAR16',
+      textNormalization: 'ON',
+      bitRate: 64000,
+      sampleRate: 24000,
+      speakingRate: 1.0,
+      temperature: 1.1,
+      bufferCharThreshold: 100,
+      maxBufferDelayMs: 3000,
+    });
+
+    // List available voices (non-blocking)
+    tts
+      .listVoices()
+      .then((voices: inworld.Voice[]) => {
         console.log(`[Inworld TTS] ${voices.length} voices available in this workspace`);
         if (voices.length > 0) {
-          console.log('[Inworld TTS] Logging information for first voice:', JSON.stringify(voices[0], null, 2));
+          console.log(
+            '[Inworld TTS] Logging information for first voice:',
+            JSON.stringify(voices[0], null, 2),
+          );
         }
-      }).catch((err: Error) => {
+      })
+      .catch((err: Error) => {
         console.error('[Inworld TTS] Failed to list voices:', err);
       });
-  
-      const session = new voice.AgentSession({
-        // Speech-to-text (STT) is your agent's ears, turning the user's speech into text that the LLM can understand
-        // See all available models at https://docs.livekit.io/agents/models/stt/
-        stt: 'assemblyai/universal-streaming:en',
-        // A Large Language Model (LLM) is your agent's brain, processing user input and generating a response
-        // See all available models at https://docs.livekit.io/agents/models/llm/
-        llm: 'openai/gpt-4.1-mini',
-        // Text-to-speech (TTS) is your agent's voice, turning the LLM's text into speech that the user can hear
-        // See all available models as well as voice selections at https://docs.livekit.io/agents/models/tts/
-        tts,
-        // VAD and turn detection are used to determine when the user is speaking and when the agent should respond
-        // See more at https://docs.livekit.io/agents/build/turns
-        vad: ctx.proc.userData.vad! as silero.VAD,
-        turnDetection: new livekit.turnDetector.MultilingualModel(),
-        // to use realtime model, replace the stt, llm, tts and vad with the following
-        // llm: new openai.realtime.RealtimeModel(),
-        voiceOptions: {
-          // allow the LLM to generate a response while waiting for the end of turn
-          preemptiveGeneration: true,
-        },
-      });
-  
-      // timestamp handling for inworld TTS
-      session.tts!.on('alignment' as any, (data: any) => {
-        if (data.wordAlignment) {
-          const { words, starts, ends } = data.wordAlignment;
-          for (let i = 0; i < words.length; i++) {
-            console.log(`[Inworld TTS] Word: "${words[i]}", Start: ${starts[i]}, End: ${ends[i]}`);
-          }
+
+    const session = new voice.AgentSession({
+      // Speech-to-text (STT) is your agent's ears, turning the user's speech into text that the LLM can understand
+      // See all available models at https://docs.livekit.io/agents/models/stt/
+      stt: 'assemblyai/universal-streaming:en',
+      // A Large Language Model (LLM) is your agent's brain, processing user input and generating a response
+      // See all available models at https://docs.livekit.io/agents/models/llm/
+      llm: 'openai/gpt-4.1-mini',
+      // Text-to-speech (TTS) is your agent's voice, turning the LLM's text into speech that the user can hear
+      // See all available models as well as voice selections at https://docs.livekit.io/agents/models/tts/
+      tts,
+      // VAD and turn detection are used to determine when the user is speaking and when the agent should respond
+      // See more at https://docs.livekit.io/agents/build/turns
+      vad: ctx.proc.userData.vad! as silero.VAD,
+      turnDetection: new livekit.turnDetector.MultilingualModel(),
+      // to use realtime model, replace the stt, llm, tts and vad with the following
+      // llm: new openai.realtime.RealtimeModel(),
+      voiceOptions: {
+        // allow the LLM to generate a response while waiting for the end of turn
+        preemptiveGeneration: true,
+      },
+    });
+
+    // timestamp handling for inworld TTS
+    session.tts!.on('alignment' as any, (data: any) => {
+      if (data.wordAlignment) {
+        const { words, starts, ends } = data.wordAlignment;
+        for (let i = 0; i < words.length; i++) {
+          console.log(`[Inworld TTS] Word: "${words[i]}", Start: ${starts[i]}, End: ${ends[i]}`);
         }
-        if (data.characterAlignment) {
-          const { chars, starts, ends } = data.characterAlignment;
-          for (let i = 0; i < chars.length; i++) {
-            console.log(`[Inworld TTS] Char: "${chars[i]}", Start: ${starts[i]}, End: ${ends[i]}`);
-          }
+      }
+      if (data.characterAlignment) {
+        const { chars, starts, ends } = data.characterAlignment;
+        for (let i = 0; i < chars.length; i++) {
+          console.log(`[Inworld TTS] Char: "${chars[i]}", Start: ${starts[i]}, End: ${ends[i]}`);
         }
-      });
-  
-      const usageCollector = new metrics.UsageCollector();
-  
-      session.on(voice.AgentSessionEventTypes.MetricsCollected, (ev) => {
-        metrics.logMetrics(ev.metrics);
-        usageCollector.collect(ev.metrics);
-      });
-  
-      await session.start({
-        agent,
-        room: ctx.room,
-        inputOptions: {
-          noiseCancellation: BackgroundVoiceCancellation(),
-        },
-      });
-  
-      session.say('Hello, how can I help you today?');
-    },
-  });
-  
-  cli.runApp(new WorkerOptions({ agent: fileURLToPath(import.meta.url) }));
-  
\ No newline at end of file
+      }
+    });
+
+    const usageCollector = new metrics.UsageCollector();
+
+    session.on(voice.AgentSessionEventTypes.MetricsCollected, (ev) => {
+      metrics.logMetrics(ev.metrics);
+      usageCollector.collect(ev.metrics);
+    });
+
+    await session.start({
+      agent,
+      room: ctx.room,
+      inputOptions: {
+        noiseCancellation: BackgroundVoiceCancellation(),
+      },
+    });
+
+    session.say('Hello, how can I help you today?');
+  },
+});
+
+cli.runApp(new WorkerOptions({ agent: fileURLToPath(import.meta.url) }));
diff --git a/plugins/inworld/src/index.ts b/plugins/inworld/src/index.ts
index 932950e2..5d0b0b5b 100644
--- a/plugins/inworld/src/index.ts
+++ b/plugins/inworld/src/index.ts
@@ -2,7 +2,7 @@
 //
 // SPDX-License-Identifier: Apache-2.0
 import { Plugin } from '@livekit/agents';
-import { TTS } from './tts.js';
+import './tts.js';
 
 export * from './tts.js';
 
@@ -17,4 +17,3 @@ class InworldPlugin extends Plugin {
 }
 
 Plugin.registerPlugin(new InworldPlugin());
-
diff --git a/plugins/inworld/src/tts.ts b/plugins/inworld/src/tts.ts
index ee0f7f78..fa2b56c2 100644
--- a/plugins/inworld/src/tts.ts
+++ b/plugins/inworld/src/tts.ts
@@ -1,19 +1,13 @@
 // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
 //
 // SPDX-License-Identifier: Apache-2.0
-import {
-  AudioByteStream,
-  log,
-  shortuuid,
-  tts,
-  tokenize,
-} from '@livekit/agents';
+import { AudioByteStream, log, shortuuid, tokenize, tts } from '@livekit/agents';
 import { type RawData, WebSocket } from 'ws';
 
 const DEFAULT_BIT_RATE = 64000;
 const DEFAULT_ENCODING = 'LINEAR16';
 const DEFAULT_MODEL = 'inworld-tts-1';
-const DEFAULT_SAMPLE_RATE = 24000; 
+const DEFAULT_SAMPLE_RATE = 24000;
 const DEFAULT_URL = 'https://api.inworld.ai/';
 const DEFAULT_WS_URL = 'wss://api.inworld.ai/';
 const DEFAULT_VOICE = 'Ashley';
@@ -236,7 +230,7 @@ class WSConnectionPool {
               this.#listeners.get(contextId)!(json);
             }
           } else if (json.error) {
-             this.#logger.warn({ error: json.error }, 'Inworld received error message');
+            this.#logger.warn({ error: json.error }, 'Inworld received error message');
           }
         } catch (e) {
           this.#logger.warn({ error: e }, 'Failed to parse Inworld WebSocket message');
@@ -332,18 +326,18 @@ export class TTS extends tts.TTS {
   updateOptions(opts: Partial<TTSOptions>) {
     this.#opts = { ...this.#opts, ...opts };
     if (opts.apiKey) {
-        this.#authorization = `Basic ${opts.apiKey}`;
-        // If API key changes, we might need to reset the pool or create a new one?
-        // For now, assume WS url doesn't change or we just create new pool if needed.
-        // But existing pool has hardcoded auth in constructor.
-        // Re-creating pool is safer.
-        this.#pool.close();
-        this.#pool = new WSConnectionPool(this.#opts.wsURL, this.#authorization);
+      this.#authorization = `Basic ${opts.apiKey}`;
+      // If API key changes, we might need to reset the pool or create a new one?
+      // For now, assume WS url doesn't change or we just create new pool if needed.
+      // But existing pool has hardcoded auth in constructor.
+      // Re-creating pool is safer.
+      this.#pool.close();
+      this.#pool = new WSConnectionPool(this.#opts.wsURL, this.#authorization);
     }
   }
 
   async close() {
-      this.#pool.close();
+    this.#pool.close();
   }
 }
 
@@ -378,7 +372,7 @@ class ChunkedStream extends tts.ChunkedStream {
     };
 
     const url = new URL('tts/v1/voice:stream', this.#opts.baseURL);
-    
+
     const response = await fetch(url.toString(), {
       method: 'POST',
       headers: {
@@ -399,7 +393,7 @@ class ChunkedStream extends tts.ChunkedStream {
     const reader = response.body.getReader();
     const decoder = new TextDecoder();
     let buffer = '';
-    
+
     const requestId = shortuuid();
     const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);
 
@@ -412,40 +406,40 @@ class ChunkedStream extends tts.ChunkedStream {
       buffer = lines.pop() || '';
 
       for (const line of lines) {
-            if (line.trim()) {
-              try {
-                const data = JSON.parse(line);
-                if (data.result) {
-                   if (data.result.audioContent) {
-                     const audio = Buffer.from(data.result.audioContent, 'base64');
-                     
-                     let pcmData = audio;
-                     if (audio.length > 44 && audio.subarray(0, 4).toString() === 'RIFF') {
-                         // This is a WAV header, skip 44 bytes
-                         pcmData = audio.subarray(44);
-                     }
-                     
-                     for (const frame of bstream.write(
-                       pcmData.buffer.slice(pcmData.byteOffset, pcmData.byteOffset + pcmData.byteLength),
-                     )) {
-                       this.queue.put({
-                         requestId,
-                         segmentId: requestId,
-                         frame,
-                         final: false,
-                       });
-                     }
-                   }
-                } else if (data.error) {
-                   throw new Error(data.error.message);
+        if (line.trim()) {
+          try {
+            const data = JSON.parse(line);
+            if (data.result) {
+              if (data.result.audioContent) {
+                const audio = Buffer.from(data.result.audioContent, 'base64');
+
+                let pcmData = audio;
+                if (audio.length > 44 && audio.subarray(0, 4).toString() === 'RIFF') {
+                  // This is a WAV header, skip 44 bytes
+                  pcmData = audio.subarray(44);
+                }
+
+                for (const frame of bstream.write(
+                  pcmData.buffer.slice(pcmData.byteOffset, pcmData.byteOffset + pcmData.byteLength),
+                )) {
+                  this.queue.put({
+                    requestId,
+                    segmentId: requestId,
+                    frame,
+                    final: false,
+                  });
                 }
-              } catch (e) {
-                log().warn({ error: e, line }, 'Failed to parse Inworld chunk');
               }
+            } else if (data.error) {
+              throw new Error(data.error.message);
             }
+          } catch (e) {
+            log().warn({ error: e, line }, 'Failed to parse Inworld chunk');
+          }
+        }
       }
     }
-    
+
     // Flush remaining frames
     for (const frame of bstream.flush()) {
       this.queue.put({
@@ -479,8 +473,8 @@ class SynthesizeStream extends tts.SynthesizeStream {
     let resolveProcessing: () => void;
     let rejectProcessing: (err: Error) => void;
     const processing = new Promise<void>((resolve, reject) => {
-        resolveProcessing = resolve;
-        rejectProcessing = reject;
+      resolveProcessing = resolve;
+      rejectProcessing = reject;
     });
 
     const handleMessage = (msg: InworldMessage) => {
@@ -488,87 +482,89 @@ class SynthesizeStream extends tts.SynthesizeStream {
       if (!result) return;
 
       if (result.contextCreated) {
-          // context created
+        // context created
       } else if (result.contextClosed) {
-          resolveProcessing();
+        resolveProcessing();
       } else if (result.audioChunk) {
-          if (result.audioChunk.timestampInfo) {
-              // Log word timestamps if available
-              const tsInfo = result.audioChunk.timestampInfo;
-              if (tsInfo.wordAlignment) {
-                  const words = tsInfo.wordAlignment.words || [];
-                  const starts = tsInfo.wordAlignment.wordStartTimeSeconds || [];
-                  const ends = tsInfo.wordAlignment.wordEndTimeSeconds || [];
-                  
-                  for (let i = 0; i < words.length; i++) {
-                      // console.log(`[Inworld TTS] Word: "${words[i]}", Start: ${starts[i]}, End: ${ends[i]}`);
-                  }
-                  
-                  (this.#tts as any).emit('alignment', {
-                      requestId: this.#contextId,
-                      segmentId: this.#contextId,
-                      wordAlignment: { words, starts, ends }
-                  });
-              }
-              
-              if (tsInfo.characterAlignment) {
-                  const chars = tsInfo.characterAlignment.characters || [];
-                  const starts = tsInfo.characterAlignment.characterStartTimeSeconds || [];
-                  const ends = tsInfo.characterAlignment.characterEndTimeSeconds || [];
-                  
-                  (this.#tts as any).emit('alignment', {
-                      requestId: this.#contextId,
-                      segmentId: this.#contextId,
-                      characterAlignment: { chars, starts, ends }
-                  });
-              }
+        if (result.audioChunk.timestampInfo) {
+          // Log word timestamps if available
+          const tsInfo = result.audioChunk.timestampInfo;
+          if (tsInfo.wordAlignment) {
+            const words = tsInfo.wordAlignment.words || [];
+            const starts = tsInfo.wordAlignment.wordStartTimeSeconds || [];
+            const ends = tsInfo.wordAlignment.wordEndTimeSeconds || [];
+
+            for (let i = 0; i < words.length; i++) {
+              // console.log(`[Inworld TTS] Word: "${words[i]}", Start: ${starts[i]}, End: ${ends[i]}`);
+            }
+
+            // eslint-disable-next-line @typescript-eslint/no-explicit-any
+            (this.#tts as any).emit('alignment', {
+              requestId: this.#contextId,
+              segmentId: this.#contextId,
+              wordAlignment: { words, starts, ends },
+            });
           }
 
-          if (result.audioChunk.audioContent) {
-              // Some servers may return either nested audioContent or top-level
-              const b64Content = result.audioChunk.audioContent || result.audioContent;
-              if (b64Content) {
-                  const audio = Buffer.from(b64Content, 'base64');
-                  let pcmData = audio;
-                  if (audio.length > 44 && audio.subarray(0, 4).toString() === 'RIFF') {
-                       // This is a WAV header, skip 44 bytes
-                       pcmData = audio.subarray(44);
-                  }
-                  for (const frame of bstream.write(
-                    pcmData.buffer.slice(pcmData.byteOffset, pcmData.byteOffset + pcmData.byteLength),
-                  )) {
-                      this.queue.put({
-                          requestId: this.#contextId,
-                          segmentId: this.#contextId,
-                          frame,
-                          final: false,
-                      });
-                  }
-              }
+          if (tsInfo.characterAlignment) {
+            const chars = tsInfo.characterAlignment.characters || [];
+            const starts = tsInfo.characterAlignment.characterStartTimeSeconds || [];
+            const ends = tsInfo.characterAlignment.characterEndTimeSeconds || [];
+
+            // eslint-disable-next-line @typescript-eslint/no-explicit-any
+            (this.#tts as any).emit('alignment', {
+              requestId: this.#contextId,
+              segmentId: this.#contextId,
+              characterAlignment: { chars, starts, ends },
+            });
+          }
+        }
+
+        if (result.audioChunk.audioContent) {
+          // Some servers may return either nested audioContent or top-level
+          const b64Content = result.audioChunk.audioContent || result.audioContent;
+          if (b64Content) {
+            const audio = Buffer.from(b64Content, 'base64');
+            let pcmData = audio;
+            if (audio.length > 44 && audio.subarray(0, 4).toString() === 'RIFF') {
+              // This is a WAV header, skip 44 bytes
+              pcmData = audio.subarray(44);
+            }
+            for (const frame of bstream.write(
+              pcmData.buffer.slice(pcmData.byteOffset, pcmData.byteOffset + pcmData.byteLength),
+            )) {
+              this.queue.put({
+                requestId: this.#contextId,
+                segmentId: this.#contextId,
+                frame,
+                final: false,
+              });
+            }
           }
+        }
       } else if (result.status && result.status.code !== 0) {
-          const error = new Error(`Inworld stream error: ${result.status.message}`);
-          rejectProcessing(error);
+        const error = new Error(`Inworld stream error: ${result.status.message}`);
+        rejectProcessing(error);
       }
     };
 
     this.#tts.pool.registerListener(this.#contextId, handleMessage);
 
     const sendLoop = async () => {
-        for await (const ev of tokenizerStream) {
-            await this.#sendText(ws, ev.token);
-        }
+      for await (const ev of tokenizerStream) {
+        await this.#sendText(ws, ev.token);
+      }
     };
     const sendPromise = sendLoop();
 
     try {
       await this.#createContext(ws);
-      
+
       for await (const text of this.input) {
         if (text === tts.SynthesizeStream.FLUSH_SENTINEL) {
-            tokenizerStream.flush();
+          tokenizerStream.flush();
         } else {
-            tokenizerStream.pushText(text);
+          tokenizerStream.pushText(text);
         }
       }
       tokenizerStream.endInput();
@@ -577,20 +573,19 @@ class SynthesizeStream extends tts.SynthesizeStream {
       await this.#flushContext(ws);
       await this.#closeContext(ws);
       await processing;
-      
+
       // Flush remaining frames
       for (const frame of bstream.flush()) {
-          this.queue.put({
-              requestId: this.#contextId,
-              segmentId: this.#contextId,
-              frame,
-              final: false,
-          });
+        this.queue.put({
+          requestId: this.#contextId,
+          segmentId: this.#contextId,
+          frame,
+          final: false,
+        });
       }
-
     } catch (e) {
-        log().error({ error: e }, 'Error in SynthesizeStream run');
-        throw e;
+      log().error({ error: e }, 'Error in SynthesizeStream run');
+      throw e;
     } finally {
       this.#tts.pool.unregisterListener(this.#contextId);
     }
diff --git a/turbo.json b/turbo.json
index 6d1d09b5..6c420a36 100644
--- a/turbo.json
+++ b/turbo.json
@@ -47,7 +47,8 @@
     "ANAM_AVATAR_ID",
     "RIME_API_KEY",
     "LK_OPENAI_DEBUG",
-    "OVHCLOUD_API_KEY"
+    "OVHCLOUD_API_KEY",
+    "INWORLD_API_KEY"
   ],
   "pipeline": {
     "build": {

From 687075c2264801651985633cc8fe1b27ec7eb269 Mon Sep 17 00:00:00 2001
From: Cale Shapera <25466659+cshape@users.noreply.github.com>
Date: Fri, 28 Nov 2025 15:23:23 -0800
Subject: [PATCH 3/3] clean up code and comments

---
 examples/src/inworld_tts.ts |  4 ++--
 plugins/inworld/src/tts.ts  | 15 +--------------
 2 files changed, 3 insertions(+), 16 deletions(-)

diff --git a/examples/src/inworld_tts.ts b/examples/src/inworld_tts.ts
index 20960d04..bb365c4c 100644
--- a/examples/src/inworld_tts.ts
+++ b/examples/src/inworld_tts.ts
@@ -41,7 +41,7 @@ export default defineAgent({
       maxBufferDelayMs: 3000,
     });
 
-    // List available voices (non-blocking)
+    // List available voices
     tts
       .listVoices()
       .then((voices: inworld.Voice[]) => {
@@ -79,7 +79,7 @@ export default defineAgent({
       },
     });
 
-    // timestamp handling for inworld TTS
+    // timestamp handling (if enabled)
     session.tts!.on('alignment' as any, (data: any) => {
       if (data.wordAlignment) {
         const { words, starts, ends } = data.wordAlignment;
diff --git a/plugins/inworld/src/tts.ts b/plugins/inworld/src/tts.ts
index fa2b56c2..c924b64f 100644
--- a/plugins/inworld/src/tts.ts
+++ b/plugins/inworld/src/tts.ts
@@ -192,7 +192,6 @@ class WSConnectionPool {
   #attemptConnection(): Promise<WebSocket> {
     return new Promise((resolve, reject) => {
       const wsUrl = new URL('tts/v1/voice:streamBidirectional', this.#url);
-      // Ensure protocol is wss
       if (wsUrl.protocol === 'https:') wsUrl.protocol = 'wss:';
       else if (wsUrl.protocol === 'http:') wsUrl.protocol = 'ws:';
 
@@ -224,7 +223,6 @@ class WSConnectionPool {
           const json = JSON.parse(data.toString()) as InworldMessage;
           const result = json.result;
           if (result) {
-            // Try to find contextId in result or top level
             const contextId = result.contextId || json.contextId;
             if (contextId && this.#listeners.has(contextId)) {
               this.#listeners.get(contextId)!(json);
@@ -288,7 +286,7 @@ export class TTS extends tts.TTS {
   }
 
   /**
-   * List all available voices.
+   * List all available voices in the workspace associated with the API key.
    * @param language - Optional ISO 639-1 language code to filter voices (e.g., 'en', 'es', 'fr')
    */
   async listVoices(language?: string): Promise<Voice[]> {
@@ -327,10 +325,6 @@ export class TTS extends tts.TTS {
     this.#opts = { ...this.#opts, ...opts };
     if (opts.apiKey) {
       this.#authorization = `Basic ${opts.apiKey}`;
-      // If API key changes, we might need to reset the pool or create a new one?
-      // For now, assume WS url doesn't change or we just create new pool if needed.
-      // But existing pool has hardcoded auth in constructor.
-      // Re-creating pool is safer.
       this.#pool.close();
       this.#pool = new WSConnectionPool(this.#opts.wsURL, this.#authorization);
     }
@@ -482,22 +476,16 @@ class SynthesizeStream extends tts.SynthesizeStream {
       if (!result) return;
 
       if (result.contextCreated) {
-        // context created
       } else if (result.contextClosed) {
         resolveProcessing();
       } else if (result.audioChunk) {
         if (result.audioChunk.timestampInfo) {
-          // Log word timestamps if available
           const tsInfo = result.audioChunk.timestampInfo;
           if (tsInfo.wordAlignment) {
             const words = tsInfo.wordAlignment.words || [];
             const starts = tsInfo.wordAlignment.wordStartTimeSeconds || [];
             const ends = tsInfo.wordAlignment.wordEndTimeSeconds || [];
 
-            for (let i = 0; i < words.length; i++) {
-              // console.log(`[Inworld TTS] Word: "${words[i]}", Start: ${starts[i]}, End: ${ends[i]}`);
-            }
-
             // eslint-disable-next-line @typescript-eslint/no-explicit-any
             (this.#tts as any).emit('alignment', {
               requestId: this.#contextId,
@@ -521,7 +509,6 @@ class SynthesizeStream extends tts.SynthesizeStream {
         }
 
         if (result.audioChunk.audioContent) {
-          // Some servers may return either nested audioContent or top-level
           const b64Content = result.audioChunk.audioContent || result.audioContent;
           if (b64Content) {
             const audio = Buffer.from(b64Content, 'base64');