-
Notifications
You must be signed in to change notification settings - Fork 383
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
toolkit: add text to speech synthesis (#801)
* Add synthesizer * Format assistants web * Add message_id to StreamEnd * Add icons * Add synthesize message client endpoint * Add useSynthesize hook * Minor refactoring * Add integration tests * Add unit tests * Refactor unit tests * Update templates * Update back-end to use google cloud * Format tests * Add tts to experimental_features * Regenerate client web * Add experimental features hook * Update front-end to use google cloud * Add loading spinner * Refactor useSynthesizer hook * Fix typo and remove is ascii check * Add api key validation * Update exception text * Fix typecheck
- Loading branch information
1 parent
f428c55
commit 7e2e993
Showing
30 changed files
with
603 additions
and
190 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -42,4 +42,6 @@ auth: | |
oidc: | ||
client_id: | ||
client_secret: | ||
well_known_endpoint: | ||
well_known_endpoint: | ||
google_cloud: | ||
api_key: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
from google.cloud.texttospeech import ( | ||
AudioConfig, | ||
AudioEncoding, | ||
SynthesisInput, | ||
TextToSpeechClient, | ||
VoiceSelectionParams, | ||
) | ||
from googleapiclient.discovery import build | ||
|
||
from backend.config import Settings | ||
|
||
|
||
def synthesize(text: str) -> bytes: | ||
""" | ||
Synthesizes speech from the input text. | ||
Args: | ||
text (str): The input text to be synthesized into speech. | ||
Returns: | ||
bytes: The audio content generated from the input text in MP3 format. | ||
Raises: | ||
ValueError: If the Google Cloud API key from the settings is not valid. | ||
""" | ||
client = TextToSpeechClient(client_options={ | ||
"api_key": _validate_google_cloud_api_key() | ||
}) | ||
|
||
language = detect_language(text) | ||
|
||
response = client.synthesize_speech( | ||
input=SynthesisInput(text=text), | ||
voice=VoiceSelectionParams(language_code=language), | ||
audio_config=AudioConfig(audio_encoding=AudioEncoding.MP3) | ||
) | ||
|
||
return response.audio_content | ||
|
||
|
||
def detect_language(text: str) -> str: | ||
""" | ||
Detect the language of the given text. | ||
Args: | ||
text (str): The text for which the language needs to be detected. | ||
Returns: | ||
str: The language code of the detected language (e.g., 'en', 'es'). | ||
Raises: | ||
ValueError: If the Google Cloud API key from the settings is not valid. | ||
""" | ||
client = build("translate", "v2", developerKey=_validate_google_cloud_api_key()) | ||
|
||
response = client.detections().list(q=text).execute() | ||
|
||
return response["detections"][0][0]["language"] | ||
|
||
|
||
def _validate_google_cloud_api_key() -> str: | ||
""" | ||
Validates the Google Cloud API key from the settings. | ||
Returns: | ||
str: The validated API key. | ||
Raises: | ||
ValueError: If the API key is not found in the settings or is empty. | ||
""" | ||
google_cloud = Settings().google_cloud | ||
|
||
if not google_cloud: | ||
raise ValueError("google_cloud in secrets.yaml is missing.") | ||
|
||
if not google_cloud.api_key: | ||
raise ValueError("google_cloud.api_key in secrets.yaml is missing.") | ||
|
||
return google_cloud.api_key |
Oops, something went wrong.