From e3c604802557e364ea88f6d1f36b5e721ca5e65b Mon Sep 17 00:00:00 2001 From: Chen188 Date: Thu, 8 Aug 2024 04:06:41 +0000 Subject: [PATCH 1/3] add transcribe_asr_python extension --- .../extension/transcribe_asr_python/README.md | 11 + .../transcribe_asr_python/__init__.py | 5 + .../transcribe_asr_python/extension.py | 1 + .../extension/transcribe_asr_python/log.py | 14 ++ .../transcribe_asr_python/manifest.json | 76 ++++++ .../transcribe_asr_python/property.json | 1 + .../transcribe_asr_python/requirements.txt | 1 + .../transcribe_asr_addon.py | 15 ++ .../transcribe_asr_extension.py | 90 +++++++ .../transcribe_config.py | 29 +++ .../transcribe_wrapper.py | 165 ++++++++++++ agents/property.json.example | 234 +++++++++++++++++- server/internal/config.go | 5 + 13 files changed, 645 insertions(+), 2 deletions(-) create mode 100644 agents/addon/extension/transcribe_asr_python/README.md create mode 100644 agents/addon/extension/transcribe_asr_python/__init__.py create mode 100644 agents/addon/extension/transcribe_asr_python/extension.py create mode 100644 agents/addon/extension/transcribe_asr_python/log.py create mode 100644 agents/addon/extension/transcribe_asr_python/manifest.json create mode 100644 agents/addon/extension/transcribe_asr_python/property.json create mode 100644 agents/addon/extension/transcribe_asr_python/requirements.txt create mode 100644 agents/addon/extension/transcribe_asr_python/transcribe_asr_addon.py create mode 100644 agents/addon/extension/transcribe_asr_python/transcribe_asr_extension.py create mode 100644 agents/addon/extension/transcribe_asr_python/transcribe_config.py create mode 100644 agents/addon/extension/transcribe_asr_python/transcribe_wrapper.py diff --git a/agents/addon/extension/transcribe_asr_python/README.md b/agents/addon/extension/transcribe_asr_python/README.md new file mode 100644 index 00000000..8bab60d2 --- /dev/null +++ b/agents/addon/extension/transcribe_asr_python/README.md @@ -0,0 +1,11 @@ +## Amazon Transcribe ASR Extension + +### Configurations + +You can config this extension by providing following environments: + +| Env | Required | Default | Notes | +| -- | -- | -- | -- | +| AWS_REGION | No | us-east-1 | The Region of Amazon Transcribe service you want to use. | +| AWS_ACCESS_KEY_ID | No | - | Access Key of your IAM User, make sure you've set proper permissions to [start stream transcription](https://docs.aws.amazon.com/transcribe/latest/APIReference/API_streaming_StartStreamTranscription.html). Will use default credentials provider if not provided. Check [document](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html). | +| AWS_SECRET_ACCESS_KEY | No | - | Secret Key of your IAM User. Will use default credentials provider if not provided. Check [document](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html). | \ No newline at end of file diff --git a/agents/addon/extension/transcribe_asr_python/__init__.py b/agents/addon/extension/transcribe_asr_python/__init__.py new file mode 100644 index 00000000..35742868 --- /dev/null +++ b/agents/addon/extension/transcribe_asr_python/__init__.py @@ -0,0 +1,5 @@ +from . import transcribe_asr_addon +from .extension import EXTENSION_NAME +from .log import logger + +logger.info(f"{EXTENSION_NAME} extension loaded") diff --git a/agents/addon/extension/transcribe_asr_python/extension.py b/agents/addon/extension/transcribe_asr_python/extension.py new file mode 100644 index 00000000..0ab3e5ac --- /dev/null +++ b/agents/addon/extension/transcribe_asr_python/extension.py @@ -0,0 +1 @@ +EXTENSION_NAME = "transcribe_asr" \ No newline at end of file diff --git a/agents/addon/extension/transcribe_asr_python/log.py b/agents/addon/extension/transcribe_asr_python/log.py new file mode 100644 index 00000000..032573f4 --- /dev/null +++ b/agents/addon/extension/transcribe_asr_python/log.py @@ -0,0 +1,14 @@ +import logging +from .extension import EXTENSION_NAME + +logger = logging.getLogger(EXTENSION_NAME) +logger.setLevel(logging.INFO) + +formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(process)d - [%(filename)s:%(lineno)d] - %(message)s" +) + +console_handler = logging.StreamHandler() +console_handler.setFormatter(formatter) + +logger.addHandler(console_handler) diff --git a/agents/addon/extension/transcribe_asr_python/manifest.json b/agents/addon/extension/transcribe_asr_python/manifest.json new file mode 100644 index 00000000..d780d2c4 --- /dev/null +++ b/agents/addon/extension/transcribe_asr_python/manifest.json @@ -0,0 +1,76 @@ +{ + "type": "extension", + "name": "transcribe_asr_python", + "version": "0.1.0", + "language": "python", + "dependencies": [ + { + "type": "system", + "name": "rte_runtime_python", + "version": "0.4.0" + } + ], + "api": { + "property": { + "region": { + "type": "string" + }, + "access_key": { + "type": "string" + }, + "secret_key": { + "type": "string" + }, + "sample_rate": { + "type": "string" + }, + "lang_code": { + "type": "string" + } + }, + "pcm_frame_in": [ + { + "name": "pcm_frame" + } + ], + "cmd_in": [ + { + "name": "on_user_joined" + }, + { + "name": "on_user_left" + }, + { + "name": "on_connection_failure" + } + ], + "data_out": [ + { + "name": "text_data", + "property": { + "time": { + "type": "int64" + }, + "duration_ms": { + "type": "int64" + }, + "language": { + "type": "string" + }, + "text": { + "type": "string" + }, + "is_final": { + "type": "bool" + }, + "stream_id": { + "type": "uint32" + }, + "end_of_segment": { + "type": "bool" + } + } + } + ] + } +} \ No newline at end of file diff --git a/agents/addon/extension/transcribe_asr_python/property.json b/agents/addon/extension/transcribe_asr_python/property.json new file mode 100644 index 00000000..9e26dfee --- /dev/null +++ b/agents/addon/extension/transcribe_asr_python/property.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/agents/addon/extension/transcribe_asr_python/requirements.txt b/agents/addon/extension/transcribe_asr_python/requirements.txt new file mode 100644 index 00000000..fb958819 --- /dev/null +++ b/agents/addon/extension/transcribe_asr_python/requirements.txt @@ -0,0 +1 @@ +amazon-transcribe==0.6.2 \ No newline at end of file diff --git a/agents/addon/extension/transcribe_asr_python/transcribe_asr_addon.py b/agents/addon/extension/transcribe_asr_python/transcribe_asr_addon.py new file mode 100644 index 00000000..efdb31da --- /dev/null +++ b/agents/addon/extension/transcribe_asr_python/transcribe_asr_addon.py @@ -0,0 +1,15 @@ +from rte import ( + Addon, + register_addon_as_extension, + RteEnv, +) +from .extension import EXTENSION_NAME +from .log import logger +from .transcribe_asr_extension import TranscribeAsrExtension + + +@register_addon_as_extension(EXTENSION_NAME) +class TranscribeAsrExtensionAddon(Addon): + def on_create_instance(self, rte: RteEnv, addon_name: str, context) -> None: + logger.info("on_create_instance") + rte.on_create_instance_done(TranscribeAsrExtension(addon_name), context) diff --git a/agents/addon/extension/transcribe_asr_python/transcribe_asr_extension.py b/agents/addon/extension/transcribe_asr_python/transcribe_asr_extension.py new file mode 100644 index 00000000..0ce8a4b6 --- /dev/null +++ b/agents/addon/extension/transcribe_asr_python/transcribe_asr_extension.py @@ -0,0 +1,90 @@ +from rte import ( + Extension, + RteEnv, + Cmd, + PcmFrame, + StatusCode, + CmdResult, +) + +import asyncio +import threading + +from .log import logger +from .transcribe_wrapper import AsyncTranscribeWrapper, TranscribeConfig + +PROPERTY_REGION = "region" # Optional +PROPERTY_ACCESS_KEY = "access_key" # Optional +PROPERTY_SECRET_KEY = "secret_key" # Optional +PROPERTY_SAMPLE_RATE = 'sample_rate'# Optional +PROPERTY_LANG_CODE = 'lang_code' # Optional + + +class TranscribeAsrExtension(Extension): + def __init__(self, name: str): + super().__init__(name) + + self.stopped = False + self.queue = asyncio.Queue(maxsize=3000) # about 3000 * 10ms = 30s input + self.transcribe = None + self.thread = None + + self.loop = asyncio.new_event_loop() + asyncio.set_event_loop(self.loop) + + def on_start(self, rte: RteEnv) -> None: + logger.info("TranscribeAsrExtension on_start") + + transcribe_config = TranscribeConfig.default_config() + + for optional_param in [PROPERTY_REGION, PROPERTY_SAMPLE_RATE, PROPERTY_LANG_CODE, + PROPERTY_ACCESS_KEY, PROPERTY_SECRET_KEY]: + try: + value = rte.get_property_string(optional_param).strip() + if value: + transcribe_config.__setattr__(optional_param, value) + except Exception as err: + logger.debug(f"GetProperty optional {optional_param} failed, err: {err}. Using default value: {transcribe_config.__getattribute__(optional_param)}") + + self.transcribe = AsyncTranscribeWrapper(transcribe_config, self.queue, rte, self.loop) + + logger.info("Starting async_transcribe_wrapper thread") + self.thread = threading.Thread(target=self.transcribe.run, args=[]) + self.thread.start() + + rte.on_start_done() + + def put_pcm_frame(self, pcm_frame: PcmFrame) -> None: + try: + asyncio.run_coroutine_threadsafe(self.queue.put(pcm_frame), self.loop).result(timeout=0.1) + except asyncio.QueueFull: + logger.exception("Queue is full, dropping frame") + except Exception as e: + logger.exception(f"Error putting frame in queue: {e}") + + def on_pcm_frame(self, rte: RteEnv, pcm_frame: PcmFrame) -> None: + self.put_pcm_frame(pcm_frame=pcm_frame) + + def on_stop(self, rte: RteEnv) -> None: + logger.info("TranscribeAsrExtension on_stop") + + # put an empty frame to stop transcribe_wrapper + self.put_pcm_frame(None) + self.stopped = True + self.thread.join() + self.loop.stop() + self.loop.close() + + rte.on_stop_done() + + def on_cmd(self, rte: RteEnv, cmd: Cmd) -> None: + logger.info("TranscribeAsrExtension on_cmd") + cmd_json = cmd.to_json() + logger.info("TranscribeAsrExtension on_cmd json: " + cmd_json) + + cmdName = cmd.get_name() + logger.info("got cmd %s" % cmdName) + + cmd_result = CmdResult.create(StatusCode.OK) + cmd_result.set_property_string("detail", "success") + rte.return_result(cmd_result, cmd) diff --git a/agents/addon/extension/transcribe_asr_python/transcribe_config.py b/agents/addon/extension/transcribe_asr_python/transcribe_config.py new file mode 100644 index 00000000..e404d091 --- /dev/null +++ b/agents/addon/extension/transcribe_asr_python/transcribe_config.py @@ -0,0 +1,29 @@ +from typing import Union + +class TranscribeConfig: + def __init__(self, + region: str, + access_key: str, + secret_key: str, + sample_rate: Union[str, int], + lang_code: str): + self.region = region + self.access_key = access_key + self.secret_key = secret_key + + self.lang_code = lang_code + self.sample_rate = int(sample_rate) + + self.media_encoding = 'pcm' + self.bytes_per_sample = 2, + self.channel_nums = 1 + + @classmethod + def default_config(cls): + return cls( + region="us-east-1", + access_key="", + secret_key="", + sample_rate=16000, + lang_code='en-US' + ) \ No newline at end of file diff --git a/agents/addon/extension/transcribe_asr_python/transcribe_wrapper.py b/agents/addon/extension/transcribe_asr_python/transcribe_wrapper.py new file mode 100644 index 00000000..16b9cd46 --- /dev/null +++ b/agents/addon/extension/transcribe_asr_python/transcribe_wrapper.py @@ -0,0 +1,165 @@ +from typing import Union +import asyncio + +from rte import ( + RteEnv, + Data +) + +from amazon_transcribe.auth import StaticCredentialResolver +from amazon_transcribe.client import TranscribeStreamingClient +from amazon_transcribe.handlers import TranscriptResultStreamHandler +from amazon_transcribe.model import TranscriptEvent, TranscriptResultStream, StartStreamTranscriptionEventStream + +from .log import logger +from .transcribe_config import TranscribeConfig + +DATA_OUT_TEXT_DATA_PROPERTY_TEXT = "text" +DATA_OUT_TEXT_DATA_PROPERTY_IS_FINAL = "is_final" + +def create_and_send_data(rte: RteEnv, text_result: str, is_final: bool): + stable_data = Data.create("text_data") + stable_data.set_property_bool(DATA_OUT_TEXT_DATA_PROPERTY_IS_FINAL, is_final) + stable_data.set_property_string(DATA_OUT_TEXT_DATA_PROPERTY_TEXT, text_result) + rte.send_data(stable_data) + + +class AsyncTranscribeWrapper(): + def __init__(self, config: TranscribeConfig, queue: asyncio.Queue, rte:RteEnv, loop: asyncio.BaseEventLoop): + self.queue = queue + self.rte = rte + self.stopped = False + self.config = config + self.loop = loop + + if config.access_key and config.secret_key: + logger.info(f"init trascribe client with access key: {config.access_key}") + self.transcribe_client = TranscribeStreamingClient( + region=config.region, + credential_resolver=StaticCredentialResolver( + access_key_id=config.access_key, + secret_access_key=config.secret_key + ) + ) + else: + logger.info(f"init trascribe client without access key, using default credentials provider chain.") + + self.transcribe_client = TranscribeStreamingClient( + region=config.region + ) + + asyncio.set_event_loop(self.loop) + self.reset_stream() + + def reset_stream(self): + self.stream = None + self.handler = None + self.event_handler_task = None + + async def cleanup(self): + if self.stream: + await self.stream.input_stream.end_stream() + logger.info("cleanup: stream ended.") + + if self.event_handler_task: + await self.event_handler_task + logger.info("cleanup: event handler ended.") + + self.reset_stream() + + async def create_stream(self) -> bool: + try: + self.stream = await self.get_transcribe_stream() + self.handler = TranscribeEventHandler(self.stream.output_stream, self.rte) + self.event_handler_task = asyncio.create_task(self.handler.handle_events()) + except Exception as e: + logger.exception(e) + return False + + return True + + async def send_frame(self) -> None: + while not self.stopped: + try: + pcm_frame = await asyncio.wait_for(self.queue.get(), timeout=10.0) + + if pcm_frame is None: + logger.warning("send_frame: exit due to None value got.") + return + + frame_buf = pcm_frame.get_buf() + if not frame_buf: + logger.warning("send_frame: empty pcm_frame detected.") + continue + + if not self.stream: + logger.info("lazy init stream.") + if not await self.create_stream(): + continue + + await self.stream.input_stream.send_audio_event(audio_chunk=frame_buf) + self.queue.task_done() + except asyncio.TimeoutError: + if self.stream: + await self.cleanup() + logger.debug("send_frame: no data for 10s, will close current stream and create a new one when receving new frame.") + else: + logger.debug("send_frame: waiting for pcm frame.") + except IOError as e: + logger.exception(f"Error in send_frame: {e}") + except Exception as e: + logger.exception(f"Error in send_frame: {e}") + raise e + + logger.info("send_frame: exit due to self.stopped == True") + + async def transcribe_loop(self) -> None: + try: + await self.send_frame() + except Exception as e: + logger.exception(e) + finally: + await self.cleanup() + + async def get_transcribe_stream(self) -> StartStreamTranscriptionEventStream: + stream = await self.transcribe_client.start_stream_transcription( + language_code=self.config.lang_code, + media_sample_rate_hz=self.config.sample_rate, + media_encoding=self.config.media_encoding, + ) + return stream + + def run(self) -> None: + self.loop.run_until_complete(self.transcribe_loop()) + self.loop.close() + logger.info("async_transcribe_wrapper: thread completed.") + + def stop(self) -> None: + self.stopped = True + + +class TranscribeEventHandler(TranscriptResultStreamHandler): + def __init__(self, transcript_result_stream: TranscriptResultStream, rte: RteEnv): + super().__init__(transcript_result_stream) + self.rte = rte + + async def handle_transcript_event(self, transcript_event: TranscriptEvent) -> None: + results = transcript_event.transcript.results + text_result = "" + + is_final = True + + for result in results: + if result.is_partial: + is_final = False + # continue + + for alt in result.alternatives: + text_result += alt.transcript + + if not text_result: + return + + logger.info(f"got transcript: [{text_result}], is_final: [{is_final}]") + + create_and_send_data(rte=self.rte, text_result=text_result, is_final=is_final) diff --git a/agents/property.json.example b/agents/property.json.example index c84ad926..899e7b73 100644 --- a/agents/property.json.example +++ b/agents/property.json.example @@ -987,7 +987,7 @@ "data": [ { "name": "data", - "dest":[ + "dest": [ { "extension_group": "default", "extension": "agora_rtc" @@ -1166,6 +1166,236 @@ } ] }, + { + "name": "va.transcribe-bedrock.polly", + "auto_start": false, + "nodes": [ + { + "type": "extension", + "extension_group": "default", + "addon": "agora_rtc", + "name": "agora_rtc", + "property": { + "app_id": "baf537f77ebc4187a06a1628a1827f14", + "token": "", + "channel": "astra_agents_test", + "stream_id": 1234, + "remote_stream_id": 123, + "subscribe_audio": true, + "publish_audio": true, + "publish_data": false, + "enable_agora_asr": false, + "agora_asr_vendor_name": "microsoft", + "agora_asr_language": "en-US", + "agora_asr_vendor_key": "", + "agora_asr_vendor_region": "", + "agora_asr_session_control_file_path": "session_control.conf" + } + }, + { + "type": "extension", + "extension_group": "asr", + "addon": "transcribe_asr", + "name": "transcribe_asr", + "property": { + "region": "us-east-1", + "access_key": "", + "secret_key": "", + "sample_rate": "16000", + "lang_code": "en-US" + } + }, + { + "type": "extension", + "extension_group": "bedrock", + "addon": "bedrock_llm_python", + "name": "bedrock_llm", + "property": { + "region": "us-east-1", + "access_key": "", + "secret_key": "", + "model": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "max_tokens": 512, + "prompt": "", + "greeting": "ASTRA agent connected. How can i help you today?", + "max_memory_length": 10 + } + }, + { + "type": "extension", + "extension_group": "tts", + "addon": "polly_tts", + "name": "polly_tts", + "property": { + "region": "us-east-1", + "access_key": "", + "secret_key": "", + "engine": "generative", + "voice": "Ruth", + "sample_rate": "16000", + "lang_code": "en-US" + } + }, + { + "type": "extension", + "extension_group": "default", + "addon": "interrupt_detector_python", + "name": "interrupt_detector" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "default" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "bedrock" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "tts" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "asr" + } + ], + "connections": [ + { + "extension_group": "default", + "extension": "agora_rtc", + "pcm_frame": [ + { + "name": "pcm_frame", + "dest": [ + { + "extension_group": "asr", + "extension": "transcribe_asr" + } + ] + } + ], + "cmd": [ + { + "name": "on_user_joined", + "dest": [ + { + "extension_group": "asr", + "extension": "transcribe_asr" + } + ] + }, + { + "name": "on_user_left", + "dest": [ + { + "extension_group": "asr", + "extension": "transcribe_asr" + } + ] + }, + { + "name": "on_connection_failure", + "dest": [ + { + "extension_group": "asr", + "extension": "transcribe_asr" + } + ] + } + ] + }, + { + "extension_group": "asr", + "extension": "transcribe_asr", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "default", + "extension": "interrupt_detector" + }, + { + "extension_group": "bedrock", + "extension": "bedrock_llm" + } + ] + } + ] + }, + { + "extension_group": "bedrock", + "extension": "bedrock_llm", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "tts", + "extension": "polly_tts" + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "tts", + "extension": "polly_tts" + } + ] + } + ] + }, + { + "extension_group": "tts", + "extension": "polly_tts", + "pcm_frame": [ + { + "name": "pcm_frame", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ] + }, + { + "extension_group": "default", + "extension": "interrupt_detector", + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "bedrock", + "extension": "bedrock_llm" + } + ] + } + ] + } + ] + }, { "name": "va.litellm.azure", "auto_start": true, @@ -1390,4 +1620,4 @@ } ] } -} +} \ No newline at end of file diff --git a/server/internal/config.go b/server/internal/config.go index a0480166..14ed4bd7 100644 --- a/server/internal/config.go +++ b/server/internal/config.go @@ -18,6 +18,7 @@ const ( extensionNameOpenaiChatgpt = "openai_chatgpt" extensionNamePollyTTS = "polly_tts" extensionNameQwenLLM = "qwen_llm" + extensionNameTranscribeAsr = "transcribe_asr" // Language languageChinese = "zh-CN" @@ -44,16 +45,20 @@ var ( "AWS_ACCESS_KEY_ID": { {ExtensionName: extensionNameBedrockLLM, Property: "access_key"}, {ExtensionName: extensionNamePollyTTS, Property: "access_key"}, + {ExtensionName: extensionNameTranscribeAsr, Property: "access_key"}, }, "AWS_SECRET_ACCESS_KEY": { {ExtensionName: extensionNameBedrockLLM, Property: "secret_key"}, {ExtensionName: extensionNamePollyTTS, Property: "secret_key"}, + {ExtensionName: extensionNameTranscribeAsr, Property: "secret_key"}, }, "AWS_BEDROCK_MODEL": { {ExtensionName: extensionNameBedrockLLM, Property: "model"}, }, "AWS_REGION": { {ExtensionName: extensionNameBedrockLLM, Property: "region"}, + {ExtensionName: extensionNamePollyTTS, Property: "region"}, + {ExtensionName: extensionNameTranscribeAsr, Property: "region"}, }, "AZURE_STT_KEY": { {ExtensionName: extensionNameAgoraRTC, Property: "agora_asr_vendor_key"}, From 49b269cec50a1ec248b2dea6ddb54e8060d848a9 Mon Sep 17 00:00:00 2001 From: Chen188 Date: Thu, 8 Aug 2024 04:08:11 +0000 Subject: [PATCH 2/3] bedrock_llm_extension: add time buffer for model response. --- .../bedrock_llm_python/bedrock_llm_extension.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/agents/addon/extension/bedrock_llm_python/bedrock_llm_extension.py b/agents/addon/extension/bedrock_llm_python/bedrock_llm_extension.py index 78f3e55b..ccc4e2a5 100644 --- a/agents/addon/extension/bedrock_llm_python/bedrock_llm_extension.py +++ b/agents/addon/extension/bedrock_llm_python/bedrock_llm_extension.py @@ -249,10 +249,9 @@ def converse_stream_worker(start_time, input_text, memory): first_sentence_sent = False for event in stream: - if start_time < self.outdate_ts: - logger.info( - f"GetConverseStream recv interrupt and flushing for input text: [{input_text}], startTs: {start_time}, outdateTs: {self.outdate_ts}" - ) + # allow 100ms buffer time, in case interruptor's flush cmd comes just after on_data event + if (start_time + 100_000) < self.outdate_ts: + logger.info(f"GetConverseStream recv interrupt and flushing for input text: [{input_text}], startTs: {start_time}, outdateTs: {self.outdate_ts}, delta > 100ms") break if "contentBlockDelta" in event: @@ -278,8 +277,8 @@ def converse_stream_worker(start_time, input_text, memory): sentence, content, sentence_is_final = parse_sentence( sentence, content ) - if len(sentence) == 0 or not sentence_is_final: - logger.info(f"sentence {sentence} is empty or not final") + if not sentence or not sentence_is_final: + logger.info(f"sentence [{sentence}] is empty or not final") break logger.info( f"GetConverseStream recv for input text: [{input_text}] got sentence: [{sentence}]" @@ -313,7 +312,10 @@ def converse_stream_worker(start_time, input_text, memory): if len(full_content.strip()): # remember response as assistant content in memory - memory.append( + if memory and memory[-1]['role'] == 'assistant': + memory[-1]['content'].append({"text": full_content}) + else: + memory.append( {"role": "assistant", "content": [{"text": full_content}]} ) else: From 4277e3bc019af7e243e9312c58151cc3f0fc7491 Mon Sep 17 00:00:00 2001 From: Chen188 Date: Thu, 8 Aug 2024 04:09:25 +0000 Subject: [PATCH 3/3] polly_tts_extension: remove on_init, fix sample_rate data type. --- agents/addon/extension/polly_tts/manifest.json | 4 ++-- agents/addon/extension/polly_tts/polly_tts_extension.py | 6 ------ 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/agents/addon/extension/polly_tts/manifest.json b/agents/addon/extension/polly_tts/manifest.json index 5819f043..2cc19a02 100644 --- a/agents/addon/extension/polly_tts/manifest.json +++ b/agents/addon/extension/polly_tts/manifest.json @@ -28,7 +28,7 @@ "type": "string" }, "sample_rate": { - "type": "int64" + "type": "string" }, "lang_code": { "type": "string" @@ -60,4 +60,4 @@ } ] } -} +} \ No newline at end of file diff --git a/agents/addon/extension/polly_tts/polly_tts_extension.py b/agents/addon/extension/polly_tts/polly_tts_extension.py index 63576ee9..78f95658 100644 --- a/agents/addon/extension/polly_tts/polly_tts_extension.py +++ b/agents/addon/extension/polly_tts/polly_tts_extension.py @@ -41,12 +41,6 @@ def __init__(self, name: str): self.bytes_per_sample = 2 self.number_of_channels = 1 - def on_init( - self, rte: RteEnv, manifest: MetadataInfo, property: MetadataInfo - ) -> None: - logger.info("PollyTTSExtension on_init") - rte.on_init_done(manifest, property) - def on_start(self, rte: RteEnv) -> None: logger.info("PollyTTSExtension on_start")