Skip to content

Commit

Permalink
polly_tts_extension: add support for Amazon Polly. (#127)
Browse files Browse the repository at this point in the history
* bedrock_llm_extension: add support for Amazon Bedrock Foundation Models

* bedrock_llm_ext: Code cleanup

* polly_tts_extension: add support for Amazon Polly.

* typo fix

* delete outdated code files

* clear code

---------

Co-authored-by: Chen188 <hidden>
  • Loading branch information
Chen188 authored Jul 30, 2024
1 parent 65b5a15 commit 09ca830
Show file tree
Hide file tree
Showing 15 changed files with 524 additions and 467 deletions.
64 changes: 17 additions & 47 deletions agents/addon/extension/bedrock_llm_python/bedrock_llm_extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,53 +78,23 @@ def on_start(self, rte: Rte) -> None:
# Prepare configuration
bedrock_llm_config = BedrockLLMConfig.default_config()

try:
region = rte.get_property_string(PROPERTY_REGION)
if region:
bedrock_llm_config.region = region
except Exception as err:
logger.debug(f"GetProperty optional {PROPERTY_REGION} failed, err: {err}. Using default value: {bedrock_llm_config.region}")
return

try:
access_key = rte.get_property_string(PROPERTY_ACCESS_KEY)
bedrock_llm_config.access_key = access_key
except Exception as err:
logger.error(f"GetProperty optional {PROPERTY_ACCESS_KEY} failed, err: {err}. Using default value: {bedrock_llm_config.access_key}")
return

try:
secret_key = rte.get_property_string(PROPERTY_SECRET_KEY)
bedrock_llm_config.secret_key = secret_key
except Exception as err:
logger.error(f"GetProperty optional {PROPERTY_SECRET_KEY} failed, err: {err}. Using default value: {bedrock_llm_config.secret_key}")
return

try:
model = rte.get_property_string(PROPERTY_MODEL)
if model:
bedrock_llm_config.model = model
except Exception as err:
logger.debug(f"GetProperty optional {PROPERTY_MODEL} error: {err}. Using default value: {bedrock_llm_config.model}")

try:
prompt = rte.get_property_string(PROPERTY_PROMPT)
if prompt:
bedrock_llm_config.prompt = prompt
except Exception as err:
logger.debug(f"GetProperty optional {PROPERTY_PROMPT} error: {err}. Using default value: {bedrock_llm_config.prompt}")

try:
temperature = rte.get_property_float(PROPERTY_TEMPERATURE)
bedrock_llm_config.temperature = float(temperature)
except Exception as err:
logger.debug(f"GetProperty optional {PROPERTY_TEMPERATURE} failed, err: {err}. Using default value: {bedrock_llm_config.temperature}")
for optional_str_param in [
PROPERTY_REGION, PROPERTY_ACCESS_KEY, PROPERTY_SECRET_KEY,
PROPERTY_MODEL, PROPERTY_PROMPT]:
try:
value = rte.get_property_string(optional_str_param).strip()
if value:
bedrock_llm_config.__setattr__(optional_str_param, value)
except Exception as err:
logger.debug(f"GetProperty optional {optional_str_param} failed, err: {err}. Using default value: {bedrock_llm_config.__getattribute__(optional_str_param)}")

try:
top_p = rte.get_property_float(PROPERTY_TOP_P)
bedrock_llm_config.top_p = float(top_p)
except Exception as err:
logger.debug(f"GetProperty optional {PROPERTY_TOP_P} failed, err: {err}. Using default value: {bedrock_llm_config.top_p}")
for optional_float_param in [PROPERTY_TEMPERATURE, PROPERTY_TOP_P]:
try:
value = rte.get_property_float(optional_float_param)
if value:
bedrock_llm_config.__setattr__(optional_float_param, value)
except Exception as err:
logger.debug(f"GetProperty optional {optional_float_param} failed, err: {err}. Using default value: {bedrock_llm_config.__getattribute__(optional_float_param)}")

try:
max_tokens = rte.get_property_int(PROPERTY_MAX_TOKENS)
Expand All @@ -150,7 +120,7 @@ def on_start(self, rte: Rte) -> None:
self.bedrock_llm = BedrockLLM(bedrock_llm_config)
logger.info(f"newBedrockLLM succeed with max_tokens: {bedrock_llm_config.max_tokens}, model: {bedrock_llm_config.model}")
except Exception as err:
logger.info(f"newBedrockLLM failed, err: {err}")
logger.exception(f"newBedrockLLM failed, err: {err}")

# Send greeting if available
if greeting:
Expand Down
2 changes: 0 additions & 2 deletions agents/addon/extension/bedrock_llm_python/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,2 @@
pillow==10.4.0
# openai==1.35.13
# requests==2.32.3
boto3==1.34.143
11 changes: 11 additions & 0 deletions agents/addon/extension/polly_tts_python/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
## Amazon Polly TTS Extension

### Configurations

You can config this extension by providing following environments:

| Env | Required | Default | Notes |
| -- | -- | -- | -- |
| AWS_TTS_REGION | No | us-east-1 | The Region of Amazon Bedrock service you want to use. |
| AWS_TTS_ACCESS_KEY_ID | No | - | Access Key of your IAM User, make sure you've set proper permissions to [synthesize speech](https://docs.aws.amazon.com/polly/latest/dg/security_iam_id-based-policy-examples.html#example-managed-policy-service-admin). Will use default credentials provider if not provided. Check [document](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html). |
| AWS_TTS_SECRET_ACCESS_KEY | No | - | Secret Key of your IAM User, make sure you've set proper permissions to [synthesize speech](https://docs.aws.amazon.com/polly/latest/dg/security_iam_id-based-policy-examples.html#example-managed-policy-service-admin). Will use default credentials provider if not provided. Check [document](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html). |
3 changes: 3 additions & 0 deletions agents/addon/extension/polly_tts_python/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from . import main

print("polly_tts_python extension loaded")
13 changes: 13 additions & 0 deletions agents/addon/extension/polly_tts_python/log.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import logging

logger = logging.getLogger("polly_tts_python")
logger.setLevel(logging.INFO)

formatter = logging.Formatter(
"%(asctime)s - %(name)s - %(levelname)s - %(process)d - [%(filename)s:%(lineno)d] - %(message)s"
)

console_handler = logging.StreamHandler()
console_handler.setFormatter(formatter)

logger.addHandler(console_handler)
183 changes: 183 additions & 0 deletions agents/addon/extension/polly_tts_python/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
from rte_runtime_python import (
Addon,
Extension,
register_addon_as_extension,
Rte,
Cmd,
PcmFrame,
RTE_PCM_FRAME_DATA_FMT,
Data,
StatusCode,
CmdResult,
MetadataInfo,
)

import queue
import threading
from datetime import datetime
import traceback
from contextlib import closing

from .log import logger
from .polly_wrapper import PollyWrapper, PollyConfig

PROPERTY_REGION = "region" # Optional
PROPERTY_ACCESS_KEY = "access_key" # Optional
PROPERTY_SECRET_KEY = "secret_key" # Optional
PROPERTY_ENGINE = 'engine' # Optional
PROPERTY_VOICE = 'voice' # Optional
PROPERTY_SAMPLE_RATE = 'sample_rate'# Optional
PROPERTY_LANG_CODE = 'lang_code' # Optional


class PollyTTSExtension(Extension):
def __init__(self, name: str):
super().__init__(name)

self.outdateTs = datetime.now()
self.stopped = False
self.thread = None
self.queue = queue.Queue()
self.frame_size = None

self.bytes_per_sample = 2
self.number_of_channels = 1

def on_init(
self, rte: Rte, manifest: MetadataInfo, property: MetadataInfo
) -> None:
logger.info("PollyTTSExtension on_init")
rte.on_init_done(manifest, property)

def on_start(self, rte: Rte) -> None:
logger.info("PollyTTSExtension on_start")

polly_config = PollyConfig.default_config()

for optional_param in [PROPERTY_REGION, PROPERTY_ENGINE, PROPERTY_VOICE,
PROPERTY_SAMPLE_RATE, PROPERTY_LANG_CODE,
PROPERTY_ACCESS_KEY, PROPERTY_SECRET_KEY ]:
try:
value = rte.get_property_string(optional_param).strip()
if value:
polly_config.__setattr__(optional_param, value)
except Exception as err:
logger.debug(f"GetProperty optional {optional_param} failed, err: {err}. Using default value: {polly_config.__getattribute__(optional_param)}")

self.polly = PollyWrapper(polly_config)
self.frame_size = int(int(polly_config.sample_rate) * self.number_of_channels * self.bytes_per_sample / 100)

self.thread = threading.Thread(target=self.async_polly_handler, args=[rte])
self.thread.start()
rte.on_start_done()

def on_stop(self, rte: Rte) -> None:
logger.info("PollyTTSExtension on_stop")

self.stopped = True
self.queue.put(None)
self.flush()
self.thread.join()
rte.on_stop_done()

def on_deinit(self, rte: Rte) -> None:
logger.info("PollyTTSExtension on_deinit")
rte.on_deinit_done()

def need_interrupt(self, ts: datetime.time) -> bool:
return (self.outdateTs - ts).total_seconds() > 1


def __get_frame(self, data: bytes) -> PcmFrame:
sample_rate = int(self.polly.config.sample_rate)

f = PcmFrame.create("pcm_frame")
f.set_sample_rate(sample_rate)
f.set_bytes_per_sample(2)
f.set_number_of_channels(1)

f.set_data_fmt(RTE_PCM_FRAME_DATA_FMT.RTE_PCM_FRAME_DATA_FMT_INTERLEAVE)
f.set_samples_per_channel(sample_rate // 100)
f.alloc_buf(self.frame_size)
buff = f.lock_buf()
if len(data) < self.frame_size:
buff[:] = bytes(self.frame_size) #fill with 0
buff[:len(data)] = data
f.unlock_buf(buff)
return f

def async_polly_handler(self, rte: Rte):
while not self.stopped:
value = self.queue.get()
if value is None:
logger.warning("async_polly_handler: exit due to None value got.")
break
inputText, ts = value
if len(inputText) == 0:
logger.warning("async_polly_handler: empty input detected.")
continue
try:
audio_stream, visemes = self.polly.synthesize(inputText)
with closing(audio_stream) as stream:
for chunk in stream.iter_chunks(chunk_size=self.frame_size):
if self.need_interrupt(ts):
logger.debug("async_polly_handler: got interrupt cmd, stop sending pcm frame.")
break

f = self.__get_frame(chunk)
rte.send_pcm_frame(f)
except Exception as e:
logger.exception(e)
logger.exception(traceback.format_exc())

def flush(self):
logger.info("PollyTTSExtension flush")
while not self.queue.empty():
self.queue.get()
self.queue.put(("", datetime.now()))

def on_data(self, rte: Rte, data: Data) -> None:
logger.info("PollyTTSExtension on_data")
inputText = data.get_property_string("text")
if len(inputText) == 0:
logger.info("ignore empty text")
return

is_end = data.get_property_bool("end_of_segment")

logger.info("on data %s %d", inputText, is_end)
self.queue.put((inputText, datetime.now()))

def on_cmd(self, rte: Rte, cmd: Cmd) -> None:
logger.info("PollyTTSExtension on_cmd")
cmd_json = cmd.to_json()
logger.info("PollyTTSExtension on_cmd json: %s" + cmd_json)

cmdName = cmd.get_name()
if cmdName == "flush":
self.outdateTs = datetime.now()
self.flush()
cmd_out = Cmd.create("flush")
rte.send_cmd(cmd_out, lambda rte, result: print("PollyTTSExtension send_cmd done"))
else:
logger.info("unknown cmd %s", cmdName)

cmd_result = CmdResult.create(StatusCode.OK)
cmd_result.set_property_string("detail", "success")
rte.return_result(cmd_result, cmd)

@register_addon_as_extension("polly_tts")
class PollyTTSExtensionAddon(Addon):
def on_init(self, rte: Rte, manifest, property) -> None:
logger.info("PollyTTSExtensionAddon on_init")
rte.on_init_done(manifest, property)
return

def on_create_instance(self, rte: Rte, addon_name: str, context) -> None:
logger.info("on_create_instance")
rte.on_create_instance_done(PollyTTSExtension(addon_name), context)

def on_deinit(self, rte: Rte) -> None:
logger.info("PollyTTSExtensionAddon on_deinit")
rte.on_deinit_done()
return
63 changes: 63 additions & 0 deletions agents/addon/extension/polly_tts_python/manifest.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
{
"type": "extension",
"name": "polly_tts",
"version": "0.1.0",
"language": "python",
"dependencies": [
{
"type": "system",
"name": "rte_runtime_python",
"version": "0.2.0"
}
],
"api": {
"property": {
"region": {
"type": "string"
},
"access_key": {
"type": "string"
},
"secret_key": {
"type": "string"
},
"engine": {
"type": "string"
},
"voice": {
"type": "string"
},
"sample_rate": {
"type": "int64"
},
"lang_code": {
"type": "string"
}
},
"data_in": [
{
"name": "text_data",
"property": {
"text": {
"type": "string"
}
}
}
],
"cmd_in": [
{
"name": "flush"
}
],
"cmd_out": [
{
"name": "flush"
}
],
"pcm_frame_out": [
{
"name": "pcm_frame"
}
]
}
}
Loading

0 comments on commit 09ca830

Please sign in to comment.