-
Notifications
You must be signed in to change notification settings - Fork 366
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
polly_tts_extension: add support for Amazon Polly. (#127)
* bedrock_llm_extension: add support for Amazon Bedrock Foundation Models * bedrock_llm_ext: Code cleanup * polly_tts_extension: add support for Amazon Polly. * typo fix * delete outdated code files * clear code --------- Co-authored-by: Chen188 <hidden>
- Loading branch information
Showing
15 changed files
with
524 additions
and
467 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,2 @@ | ||
pillow==10.4.0 | ||
# openai==1.35.13 | ||
# requests==2.32.3 | ||
boto3==1.34.143 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
## Amazon Polly TTS Extension | ||
|
||
### Configurations | ||
|
||
You can config this extension by providing following environments: | ||
|
||
| Env | Required | Default | Notes | | ||
| -- | -- | -- | -- | | ||
| AWS_TTS_REGION | No | us-east-1 | The Region of Amazon Bedrock service you want to use. | | ||
| AWS_TTS_ACCESS_KEY_ID | No | - | Access Key of your IAM User, make sure you've set proper permissions to [synthesize speech](https://docs.aws.amazon.com/polly/latest/dg/security_iam_id-based-policy-examples.html#example-managed-policy-service-admin). Will use default credentials provider if not provided. Check [document](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html). | | ||
| AWS_TTS_SECRET_ACCESS_KEY | No | - | Secret Key of your IAM User, make sure you've set proper permissions to [synthesize speech](https://docs.aws.amazon.com/polly/latest/dg/security_iam_id-based-policy-examples.html#example-managed-policy-service-admin). Will use default credentials provider if not provided. Check [document](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html). | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
from . import main | ||
|
||
print("polly_tts_python extension loaded") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
import logging | ||
|
||
logger = logging.getLogger("polly_tts_python") | ||
logger.setLevel(logging.INFO) | ||
|
||
formatter = logging.Formatter( | ||
"%(asctime)s - %(name)s - %(levelname)s - %(process)d - [%(filename)s:%(lineno)d] - %(message)s" | ||
) | ||
|
||
console_handler = logging.StreamHandler() | ||
console_handler.setFormatter(formatter) | ||
|
||
logger.addHandler(console_handler) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,183 @@ | ||
from rte_runtime_python import ( | ||
Addon, | ||
Extension, | ||
register_addon_as_extension, | ||
Rte, | ||
Cmd, | ||
PcmFrame, | ||
RTE_PCM_FRAME_DATA_FMT, | ||
Data, | ||
StatusCode, | ||
CmdResult, | ||
MetadataInfo, | ||
) | ||
|
||
import queue | ||
import threading | ||
from datetime import datetime | ||
import traceback | ||
from contextlib import closing | ||
|
||
from .log import logger | ||
from .polly_wrapper import PollyWrapper, PollyConfig | ||
|
||
PROPERTY_REGION = "region" # Optional | ||
PROPERTY_ACCESS_KEY = "access_key" # Optional | ||
PROPERTY_SECRET_KEY = "secret_key" # Optional | ||
PROPERTY_ENGINE = 'engine' # Optional | ||
PROPERTY_VOICE = 'voice' # Optional | ||
PROPERTY_SAMPLE_RATE = 'sample_rate'# Optional | ||
PROPERTY_LANG_CODE = 'lang_code' # Optional | ||
|
||
|
||
class PollyTTSExtension(Extension): | ||
def __init__(self, name: str): | ||
super().__init__(name) | ||
|
||
self.outdateTs = datetime.now() | ||
self.stopped = False | ||
self.thread = None | ||
self.queue = queue.Queue() | ||
self.frame_size = None | ||
|
||
self.bytes_per_sample = 2 | ||
self.number_of_channels = 1 | ||
|
||
def on_init( | ||
self, rte: Rte, manifest: MetadataInfo, property: MetadataInfo | ||
) -> None: | ||
logger.info("PollyTTSExtension on_init") | ||
rte.on_init_done(manifest, property) | ||
|
||
def on_start(self, rte: Rte) -> None: | ||
logger.info("PollyTTSExtension on_start") | ||
|
||
polly_config = PollyConfig.default_config() | ||
|
||
for optional_param in [PROPERTY_REGION, PROPERTY_ENGINE, PROPERTY_VOICE, | ||
PROPERTY_SAMPLE_RATE, PROPERTY_LANG_CODE, | ||
PROPERTY_ACCESS_KEY, PROPERTY_SECRET_KEY ]: | ||
try: | ||
value = rte.get_property_string(optional_param).strip() | ||
if value: | ||
polly_config.__setattr__(optional_param, value) | ||
except Exception as err: | ||
logger.debug(f"GetProperty optional {optional_param} failed, err: {err}. Using default value: {polly_config.__getattribute__(optional_param)}") | ||
|
||
self.polly = PollyWrapper(polly_config) | ||
self.frame_size = int(int(polly_config.sample_rate) * self.number_of_channels * self.bytes_per_sample / 100) | ||
|
||
self.thread = threading.Thread(target=self.async_polly_handler, args=[rte]) | ||
self.thread.start() | ||
rte.on_start_done() | ||
|
||
def on_stop(self, rte: Rte) -> None: | ||
logger.info("PollyTTSExtension on_stop") | ||
|
||
self.stopped = True | ||
self.queue.put(None) | ||
self.flush() | ||
self.thread.join() | ||
rte.on_stop_done() | ||
|
||
def on_deinit(self, rte: Rte) -> None: | ||
logger.info("PollyTTSExtension on_deinit") | ||
rte.on_deinit_done() | ||
|
||
def need_interrupt(self, ts: datetime.time) -> bool: | ||
return (self.outdateTs - ts).total_seconds() > 1 | ||
|
||
|
||
def __get_frame(self, data: bytes) -> PcmFrame: | ||
sample_rate = int(self.polly.config.sample_rate) | ||
|
||
f = PcmFrame.create("pcm_frame") | ||
f.set_sample_rate(sample_rate) | ||
f.set_bytes_per_sample(2) | ||
f.set_number_of_channels(1) | ||
|
||
f.set_data_fmt(RTE_PCM_FRAME_DATA_FMT.RTE_PCM_FRAME_DATA_FMT_INTERLEAVE) | ||
f.set_samples_per_channel(sample_rate // 100) | ||
f.alloc_buf(self.frame_size) | ||
buff = f.lock_buf() | ||
if len(data) < self.frame_size: | ||
buff[:] = bytes(self.frame_size) #fill with 0 | ||
buff[:len(data)] = data | ||
f.unlock_buf(buff) | ||
return f | ||
|
||
def async_polly_handler(self, rte: Rte): | ||
while not self.stopped: | ||
value = self.queue.get() | ||
if value is None: | ||
logger.warning("async_polly_handler: exit due to None value got.") | ||
break | ||
inputText, ts = value | ||
if len(inputText) == 0: | ||
logger.warning("async_polly_handler: empty input detected.") | ||
continue | ||
try: | ||
audio_stream, visemes = self.polly.synthesize(inputText) | ||
with closing(audio_stream) as stream: | ||
for chunk in stream.iter_chunks(chunk_size=self.frame_size): | ||
if self.need_interrupt(ts): | ||
logger.debug("async_polly_handler: got interrupt cmd, stop sending pcm frame.") | ||
break | ||
|
||
f = self.__get_frame(chunk) | ||
rte.send_pcm_frame(f) | ||
except Exception as e: | ||
logger.exception(e) | ||
logger.exception(traceback.format_exc()) | ||
|
||
def flush(self): | ||
logger.info("PollyTTSExtension flush") | ||
while not self.queue.empty(): | ||
self.queue.get() | ||
self.queue.put(("", datetime.now())) | ||
|
||
def on_data(self, rte: Rte, data: Data) -> None: | ||
logger.info("PollyTTSExtension on_data") | ||
inputText = data.get_property_string("text") | ||
if len(inputText) == 0: | ||
logger.info("ignore empty text") | ||
return | ||
|
||
is_end = data.get_property_bool("end_of_segment") | ||
|
||
logger.info("on data %s %d", inputText, is_end) | ||
self.queue.put((inputText, datetime.now())) | ||
|
||
def on_cmd(self, rte: Rte, cmd: Cmd) -> None: | ||
logger.info("PollyTTSExtension on_cmd") | ||
cmd_json = cmd.to_json() | ||
logger.info("PollyTTSExtension on_cmd json: %s" + cmd_json) | ||
|
||
cmdName = cmd.get_name() | ||
if cmdName == "flush": | ||
self.outdateTs = datetime.now() | ||
self.flush() | ||
cmd_out = Cmd.create("flush") | ||
rte.send_cmd(cmd_out, lambda rte, result: print("PollyTTSExtension send_cmd done")) | ||
else: | ||
logger.info("unknown cmd %s", cmdName) | ||
|
||
cmd_result = CmdResult.create(StatusCode.OK) | ||
cmd_result.set_property_string("detail", "success") | ||
rte.return_result(cmd_result, cmd) | ||
|
||
@register_addon_as_extension("polly_tts") | ||
class PollyTTSExtensionAddon(Addon): | ||
def on_init(self, rte: Rte, manifest, property) -> None: | ||
logger.info("PollyTTSExtensionAddon on_init") | ||
rte.on_init_done(manifest, property) | ||
return | ||
|
||
def on_create_instance(self, rte: Rte, addon_name: str, context) -> None: | ||
logger.info("on_create_instance") | ||
rte.on_create_instance_done(PollyTTSExtension(addon_name), context) | ||
|
||
def on_deinit(self, rte: Rte) -> None: | ||
logger.info("PollyTTSExtensionAddon on_deinit") | ||
rte.on_deinit_done() | ||
return |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
{ | ||
"type": "extension", | ||
"name": "polly_tts", | ||
"version": "0.1.0", | ||
"language": "python", | ||
"dependencies": [ | ||
{ | ||
"type": "system", | ||
"name": "rte_runtime_python", | ||
"version": "0.2.0" | ||
} | ||
], | ||
"api": { | ||
"property": { | ||
"region": { | ||
"type": "string" | ||
}, | ||
"access_key": { | ||
"type": "string" | ||
}, | ||
"secret_key": { | ||
"type": "string" | ||
}, | ||
"engine": { | ||
"type": "string" | ||
}, | ||
"voice": { | ||
"type": "string" | ||
}, | ||
"sample_rate": { | ||
"type": "int64" | ||
}, | ||
"lang_code": { | ||
"type": "string" | ||
} | ||
}, | ||
"data_in": [ | ||
{ | ||
"name": "text_data", | ||
"property": { | ||
"text": { | ||
"type": "string" | ||
} | ||
} | ||
} | ||
], | ||
"cmd_in": [ | ||
{ | ||
"name": "flush" | ||
} | ||
], | ||
"cmd_out": [ | ||
{ | ||
"name": "flush" | ||
} | ||
], | ||
"pcm_frame_out": [ | ||
{ | ||
"name": "pcm_frame" | ||
} | ||
] | ||
} | ||
} |
Oops, something went wrong.