Skip to content

Commit 8225a79

Browse files
matejmarinko-sonioxakshaym1shra
authored andcommitted
Update Soniox STT parameters (livekit#3670)
1 parent f659173 commit 8225a79

File tree

2 files changed

+45
-10
lines changed

2 files changed

+45
-10
lines changed

livekit-plugins/livekit-plugins-soniox/livekit/plugins/soniox/__init__.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,17 @@
1717
See https://docs.livekit.io/agents/integrations/stt/soniox/ for more information.
1818
"""
1919

20-
from .stt import STT, STTOptions
20+
from .stt import STT, ContextGeneralItem, ContextObject, ContextTranslationTerm, STTOptions
2121
from .version import __version__
2222

23-
__all__ = ["STT", "STTOptions", "__version__"]
23+
__all__ = [
24+
"STT",
25+
"STTOptions",
26+
"ContextObject",
27+
"ContextGeneralItem",
28+
"ContextTranslationTerm",
29+
"__version__",
30+
]
2431

2532

2633
from livekit.agents import Plugin

livekit-plugins/livekit-plugins-soniox/livekit/plugins/soniox/stt.py

Lines changed: 36 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
import json
1919
import os
2020
import time
21-
from dataclasses import dataclass
21+
from dataclasses import asdict, dataclass
2222

2323
import aiohttp
2424

@@ -56,22 +56,47 @@ def is_end_token(token: dict) -> bool:
5656
return token.get("text") in (END_TOKEN, FINALIZED_TOKEN)
5757

5858

59+
@dataclass
60+
class ContextGeneralItem:
61+
key: str
62+
value: str
63+
64+
65+
@dataclass
66+
class ContextTranslationTerm:
67+
source: str
68+
target: str
69+
70+
71+
@dataclass
72+
class ContextObject:
73+
"""Context object for models with context_version 2, for Soniox stt-rt-v3-preview and higher.
74+
75+
Learn more about context in the documentation:
76+
https://soniox.com/docs/stt/concepts/context
77+
"""
78+
79+
general: list[ContextGeneralItem] | None = None
80+
text: str | None = None
81+
terms: list[str] | None = None
82+
translation_terms: list[ContextTranslationTerm] | None = None
83+
84+
5985
@dataclass
6086
class STTOptions:
6187
"""Configuration options for Soniox Speech-to-Text service."""
6288

6389
model: str | None = "stt-rt-preview"
90+
6491
language_hints: list[str] | None = None
65-
context: str | None = None
92+
context: ContextObject | str | None = None
6693

6794
num_channels: int = 1
6895
sample_rate: int = 16000
6996

97+
enable_speaker_diarization: bool = False
7098
enable_language_identification: bool = True
7199

72-
enable_non_final_tokens: bool = True
73-
max_non_final_tokens_duration_ms: int | None = None
74-
75100
client_reference_id: str | None = None
76101

77102

@@ -176,6 +201,10 @@ async def _connect_ws(self):
176201
# If VAD was passed, disable endpoint detection, otherwise enable it.
177202
enable_endpoint_detection = not self._stt._vad_stream
178203

204+
context = self._stt._params.context
205+
if isinstance(context, ContextObject):
206+
context = asdict(context)
207+
179208
# Create initial config object.
180209
config = {
181210
"api_key": self._stt._api_key,
@@ -185,9 +214,8 @@ async def _connect_ws(self):
185214
"enable_endpoint_detection": enable_endpoint_detection,
186215
"sample_rate": self._stt._params.sample_rate,
187216
"language_hints": self._stt._params.language_hints,
188-
"context": self._stt._params.context,
189-
"enable_non_final_tokens": self._stt._params.enable_non_final_tokens,
190-
"max_non_final_tokens_duration_ms": self._stt._params.max_non_final_tokens_duration_ms,
217+
"context": context,
218+
"enable_speaker_diarization": self._stt._params.enable_speaker_diarization,
191219
"enable_language_identification": self._stt._params.enable_language_identification,
192220
"client_reference_id": self._stt._params.client_reference_id,
193221
}

0 commit comments

Comments
 (0)