1818import json
1919import os
2020import time
21- from dataclasses import dataclass
21+ from dataclasses import asdict , dataclass
2222
2323import aiohttp
2424
@@ -56,22 +56,47 @@ def is_end_token(token: dict) -> bool:
5656 return token .get ("text" ) in (END_TOKEN , FINALIZED_TOKEN )
5757
5858
59+ @dataclass
60+ class ContextGeneralItem :
61+ key : str
62+ value : str
63+
64+
65+ @dataclass
66+ class ContextTranslationTerm :
67+ source : str
68+ target : str
69+
70+
71+ @dataclass
72+ class ContextObject :
73+ """Context object for models with context_version 2, for Soniox stt-rt-v3-preview and higher.
74+
75+ Learn more about context in the documentation:
76+ https://soniox.com/docs/stt/concepts/context
77+ """
78+
79+ general : list [ContextGeneralItem ] | None = None
80+ text : str | None = None
81+ terms : list [str ] | None = None
82+ translation_terms : list [ContextTranslationTerm ] | None = None
83+
84+
5985@dataclass
6086class STTOptions :
6187 """Configuration options for Soniox Speech-to-Text service."""
6288
6389 model : str | None = "stt-rt-preview"
90+
6491 language_hints : list [str ] | None = None
65- context : str | None = None
92+ context : ContextObject | str | None = None
6693
6794 num_channels : int = 1
6895 sample_rate : int = 16000
6996
97+ enable_speaker_diarization : bool = False
7098 enable_language_identification : bool = True
7199
72- enable_non_final_tokens : bool = True
73- max_non_final_tokens_duration_ms : int | None = None
74-
75100 client_reference_id : str | None = None
76101
77102
@@ -176,6 +201,10 @@ async def _connect_ws(self):
176201 # If VAD was passed, disable endpoint detection, otherwise enable it.
177202 enable_endpoint_detection = not self ._stt ._vad_stream
178203
204+ context = self ._stt ._params .context
205+ if isinstance (context , ContextObject ):
206+ context = asdict (context )
207+
179208 # Create initial config object.
180209 config = {
181210 "api_key" : self ._stt ._api_key ,
@@ -185,9 +214,8 @@ async def _connect_ws(self):
185214 "enable_endpoint_detection" : enable_endpoint_detection ,
186215 "sample_rate" : self ._stt ._params .sample_rate ,
187216 "language_hints" : self ._stt ._params .language_hints ,
188- "context" : self ._stt ._params .context ,
189- "enable_non_final_tokens" : self ._stt ._params .enable_non_final_tokens ,
190- "max_non_final_tokens_duration_ms" : self ._stt ._params .max_non_final_tokens_duration_ms ,
217+ "context" : context ,
218+ "enable_speaker_diarization" : self ._stt ._params .enable_speaker_diarization ,
191219 "enable_language_identification" : self ._stt ._params .enable_language_identification ,
192220 "client_reference_id" : self ._stt ._params .client_reference_id ,
193221 }
0 commit comments