diff --git a/google/genai/live.py b/google/genai/live.py index a158a45f7..0998be47f 100644 --- a/google/genai/live.py +++ b/google/genai/live.py @@ -1025,6 +1025,49 @@ def _t_tool_response( ) from e +def _t_speech_config( + origin: Union[types.SpeechConfigUnionDict, Any], +) -> Optional[types.SpeechConfig]: + if not origin: + return None + if isinstance(origin, types.SpeechConfig): + return origin + if isinstance(origin, str): + # There is no way to know if the string is a voice name or a language code. + raise ValueError( + f'Unsupported speechConfig type: {type(origin)}. There is no way to' + ' know if the string is a voice name or a language code.' + ) + if isinstance(origin, dict): + speech_config = types.SpeechConfig() + if ( + 'voice_config' in origin + and origin['voice_config'] is not None + and 'prebuilt_voice_config' in origin['voice_config'] + and origin['voice_config']['prebuilt_voice_config'] is not None + and 'voice_name' in origin['voice_config']['prebuilt_voice_config'] + ): + speech_config.voice_config = types.VoiceConfig( + prebuilt_voice_config=types.PrebuiltVoiceConfig( + voice_name=origin['voice_config']['prebuilt_voice_config'].get( + 'voice_name' + ) + ) + ) + if 'language_code' in origin and origin['language_code'] is not None: + speech_config.language_code = origin['language_code'] + if ( + speech_config.voice_config is None + and speech_config.language_code is None + ): + raise ValueError( + 'Unsupported speechConfig type: {type(origin)}. At least one of' + ' voice_config or language_code must be set.' + ) + return speech_config + raise ValueError(f'Unsupported speechConfig type: {type(origin)}') + + class AsyncLive(_api_module.BaseModule): """AsyncLive. The live module is experimental.""" @@ -1056,18 +1099,14 @@ def _LiveSetup_to_mldev( if getv(to_object, ['generationConfig']) is not None: to_object['generationConfig']['speechConfig'] = _SpeechConfig_to_mldev( self._api_client, - t.t_speech_config( - self._api_client, getv(config, ['speech_config']) - ), + _t_speech_config(getv(config, ['speech_config'])), to_object, ) else: to_object['generationConfig'] = { 'speechConfig': _SpeechConfig_to_mldev( self._api_client, - t.t_speech_config( - self._api_client, getv(config, ['speech_config']) - ), + _t_speech_config(getv(config, ['speech_config'])), to_object, ) } @@ -1169,18 +1208,14 @@ def _LiveSetup_to_vertex( if getv(to_object, ['generationConfig']) is not None: to_object['generationConfig']['speechConfig'] = _SpeechConfig_to_vertex( self._api_client, - t.t_speech_config( - self._api_client, getv(config, ['speech_config']) - ), + _t_speech_config(getv(config, ['speech_config'])), to_object, ) else: to_object['generationConfig'] = { 'speechConfig': _SpeechConfig_to_vertex( self._api_client, - t.t_speech_config( - self._api_client, getv(config, ['speech_config']) - ), + _t_speech_config(getv(config, ['speech_config'])), to_object, ) } diff --git a/google/genai/models.py b/google/genai/models.py index 92f5bfbc0..302b4c727 100644 --- a/google/genai/models.py +++ b/google/genai/models.py @@ -393,6 +393,9 @@ def _SpeechConfig_to_mldev( ), ) + if getv(from_object, ['language_code']) is not None: + setv(to_object, ['languageCode'], getv(from_object, ['language_code'])) + return to_object @@ -1490,6 +1493,9 @@ def _SpeechConfig_to_vertex( ), ) + if getv(from_object, ['language_code']) is not None: + setv(to_object, ['languageCode'], getv(from_object, ['language_code'])) + return to_object diff --git a/google/genai/tests/live/test_live.py b/google/genai/tests/live/test_live.py index cdda5216b..441bc3520 100644 --- a/google/genai/tests/live/test_live.py +++ b/google/genai/tests/live/test_live.py @@ -33,6 +33,7 @@ from ... import live from ... import types + def exception_if_mldev(vertexai, exception_type: type[Exception]): if vertexai: return contextlib.nullcontext() @@ -490,7 +491,8 @@ async def test_bidi_setup_to_api_speech_config(vertexai): 'speechConfig': { 'voiceConfig': { 'prebuiltVoiceConfig': {'voiceName': 'en-default'} - } + }, + 'languageCode': 'en-US', }, 'temperature': 0.7, 'topP': 0.8, @@ -512,14 +514,15 @@ async def test_bidi_setup_to_api_speech_config(vertexai): expected_result['setup']['model'] = 'projects/test_project/locations/us-central1/publishers/google/models/test_model' expected_result['setup']['generationConfig']['responseModalities'] = ['AUDIO'] else: - expected_result['setup']['model'] = 'models/test_model' + expected_result['setup']['model'] = 'models/test_model' # Test for mldev, config is a dict config_dict = { 'speech_config': { 'voice_config': { 'prebuilt_voice_config': {'voice_name': 'en-default'} - } + }, + 'language_code': 'en-US', }, 'temperature': 0.7, 'top_p': 0.8, @@ -542,7 +545,8 @@ async def test_bidi_setup_to_api_speech_config(vertexai): prebuilt_voice_config=types.PrebuiltVoiceConfig( voice_name='en-default' ) - ) + ), + language_code='en-US', ), temperature=0.7, top_p=0.8, diff --git a/google/genai/types.py b/google/genai/types.py index 438ed8963..92aa8b631 100644 --- a/google/genai/types.py +++ b/google/genai/types.py @@ -1541,6 +1541,12 @@ class SpeechConfig(_common.BaseModel): description="""The configuration for the speaker to use. """, ) + language_code: Optional[str] = Field( + default=None, + description="""Language code (ISO 639. e.g. en-US) for the speech synthesization. + Only available for Live API. + """, + ) class SpeechConfigDict(TypedDict, total=False): @@ -1550,6 +1556,11 @@ class SpeechConfigDict(TypedDict, total=False): """The configuration for the speaker to use. """ + language_code: Optional[str] + """Language code (ISO 639. e.g. en-US) for the speech synthesization. + Only available for Live API. + """ + SpeechConfigOrDict = Union[SpeechConfig, SpeechConfigDict]