From ed75c027cf657dd52c877b750ace86bff4a5fbe4 Mon Sep 17 00:00:00 2001 From: sunshinexcode <24xinhui@163.com> Date: Fri, 25 Oct 2024 21:00:59 +0800 Subject: [PATCH 1/5] feat(): add deepgram_asr_python/cartesia_tts extensions --- .env.example | 10 +- agents/property.json | 270 +++++++++++++++++- .../extension/cartesia_tts/__init__.py | 6 + .../cartesia_tts/cartesia_tts_addon.py | 24 ++ .../cartesia_tts/cartesia_tts_extension.py | 197 +++++++++++++ .../cartesia_tts/cartesia_wrapper.py | 112 ++++++++ .../extension/cartesia_tts/extension.py | 1 + .../extension/cartesia_tts/log.py | 12 + .../extension/cartesia_tts/manifest.json | 56 ++++ .../extension/cartesia_tts/property.json | 1 + .../extension/deepgram_asr_python/__init__.py | 5 + .../deepgram_asr_python/deepgram_asr_addon.py | 14 + .../deepgram_asr_extension.py | 104 +++++++ .../deepgram_asr_python/deepgram_config.py | 26 ++ .../deepgram_asr_python/deepgram_wrapper.py | 129 +++++++++ .../deepgram_asr_python/extension.py | 3 + .../extension/deepgram_asr_python/log.py | 16 ++ .../deepgram_asr_python/manifest.json | 87 ++++++ .../deepgram_asr_python/property.json | 1 + .../deepgram_asr_python/requirements.txt | 1 + 20 files changed, 1073 insertions(+), 2 deletions(-) create mode 100644 agents/ten_packages/extension/cartesia_tts/__init__.py create mode 100644 agents/ten_packages/extension/cartesia_tts/cartesia_tts_addon.py create mode 100644 agents/ten_packages/extension/cartesia_tts/cartesia_tts_extension.py create mode 100644 agents/ten_packages/extension/cartesia_tts/cartesia_wrapper.py create mode 100644 agents/ten_packages/extension/cartesia_tts/extension.py create mode 100644 agents/ten_packages/extension/cartesia_tts/log.py create mode 100644 agents/ten_packages/extension/cartesia_tts/manifest.json create mode 100644 agents/ten_packages/extension/cartesia_tts/property.json create mode 100644 agents/ten_packages/extension/deepgram_asr_python/__init__.py create mode 100644 agents/ten_packages/extension/deepgram_asr_python/deepgram_asr_addon.py create mode 100644 agents/ten_packages/extension/deepgram_asr_python/deepgram_asr_extension.py create mode 100644 agents/ten_packages/extension/deepgram_asr_python/deepgram_config.py create mode 100644 agents/ten_packages/extension/deepgram_asr_python/deepgram_wrapper.py create mode 100644 agents/ten_packages/extension/deepgram_asr_python/extension.py create mode 100644 agents/ten_packages/extension/deepgram_asr_python/log.py create mode 100644 agents/ten_packages/extension/deepgram_asr_python/manifest.json create mode 100644 agents/ten_packages/extension/deepgram_asr_python/property.json create mode 100644 agents/ten_packages/extension/deepgram_asr_python/requirements.txt diff --git a/.env.example b/.env.example index 4713a75f..d35c8332 100644 --- a/.env.example +++ b/.env.example @@ -18,7 +18,7 @@ WORKERS_MAX=100 # Worker quit timeout in seconds WORKER_QUIT_TIMEOUT_SECONDES=60 -# Agora App ID +# Agora App ID # Agora App Certificate(only required if enabled in the Agora Console) AGORA_APP_ID= AGORA_APP_CERTIFICATE= @@ -55,10 +55,18 @@ AZURE_STT_REGION= AZURE_TTS_KEY= AZURE_TTS_REGION= +# Extension: cartesia_tts +# Cartesia TTS key +CARTESIA_API_KEY= + # Extension: cosy_tts # Cosy TTS key COSY_TTS_KEY= +# Extension: deepgram_asr_python +# Deepgram ASR key +DEEPGRAM_API_KEY= + # Extension: elevenlabs_tts # ElevenLabs TTS key ELEVENLABS_TTS_KEY= diff --git a/agents/property.json b/agents/property.json index 5b6f46a9..df96fb8f 100644 --- a/agents/property.json +++ b/agents/property.json @@ -3153,7 +3153,275 @@ ] } ] + }, + { + "name": "va.deepgram.openai.cartesia", + "auto_start": false, + "nodes": [ + { + "type": "extension", + "extension_group": "default", + "addon": "agora_rtc", + "name": "agora_rtc", + "property": { + "app_id": "${env:AGORA_APP_ID}", + "token": "", + "channel": "ten_agent_test", + "stream_id": 1234, + "remote_stream_id": 123, + "subscribe_audio": true, + "publish_audio": true, + "publish_data": true, + "enable_agora_asr": false, + "agora_asr_vendor_name": "microsoft", + "agora_asr_language": "en-US", + "agora_asr_vendor_key": "${env:AZURE_STT_KEY}", + "agora_asr_vendor_region": "${env:AZURE_STT_REGION}", + "agora_asr_session_control_file_path": "session_control.conf" + } + }, + { + "type": "extension", + "extension_group": "asr", + "addon": "deepgram_asr_python", + "name": "deepgram_asr", + "property": { + "api_key": "${env:DEEPGRAM_API_KEY}", + "language": "en-US", + "model": "nova-2", + "sample_rate": "16000" + } + }, + { + "type": "extension", + "extension_group": "chatgpt", + "addon": "openai_chatgpt", + "name": "openai_chatgpt", + "property": { + "base_url": "", + "api_key": "${env:OPENAI_API_KEY}", + "frequency_penalty": 0.9, + "model": "gpt-4o-mini", + "max_tokens": 512, + "prompt": "", + "proxy_url": "${env:OPENAI_PROXY_URL}", + "greeting": "TEN Agent connected. How can I help you today?", + "max_memory_length": 10 + } + }, + { + "type": "extension", + "extension_group": "tts", + "addon": "cartesia_tts", + "name": "cartesia_tts", + "property": { + "api_key": "${env:CARTESIA_API_KEY}", + "cartesia_version": "2024-06-10", + "model_id": "sonic-english", + "voice_id": "f9836c6e-a0bd-460e-9d3c-f7299fa60f94", + "sample_rate": "16000" + } + }, + { + "type": "extension", + "extension_group": "default", + "addon": "interrupt_detector_python", + "name": "interrupt_detector" + }, + { + "type": "extension", + "extension_group": "transcriber", + "addon": "message_collector", + "name": "message_collector" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "default" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "chatgpt" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "tts" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "asr" + } + ], + "connections": [ + { + "extension_group": "default", + "extension": "agora_rtc", + "audio_frame": [ + { + "name": "pcm_frame", + "dest": [ + { + "extension_group": "asr", + "extension": "deepgram_asr" + }, + { + "extension_group": "transcriber", + "extension": "message_collector" + } + ] + } + ], + "cmd": [ + { + "name": "on_user_joined", + "dest": [ + { + "extension_group": "asr", + "extension": "deepgram_asr" + } + ] + }, + { + "name": "on_user_left", + "dest": [ + { + "extension_group": "asr", + "extension": "deepgram_asr" + } + ] + }, + { + "name": "on_connection_failure", + "dest": [ + { + "extension_group": "asr", + "extension": "deepgram_asr" + } + ] + } + ] + }, + { + "extension_group": "asr", + "extension": "deepgram_asr", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "default", + "extension": "interrupt_detector" + }, + { + "extension_group": "transcriber", + "extension": "message_collector" + } + ] + } + ] + }, + { + "extension_group": "chatgpt", + "extension": "openai_chatgpt", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "tts", + "extension": "cartesia_tts" + }, + { + "extension_group": "transcriber", + "extension": "message_collector" + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "tts", + "extension": "cartesia_tts" + } + ] + } + ] + }, + { + "extension_group": "transcriber", + "extension": "message_collector", + "data": [ + { + "name": "data", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ] + }, + { + "extension_group": "tts", + "extension": "cartesia_tts", + "audio_frame": [ + { + "name": "pcm_frame", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ] + }, + { + "extension_group": "default", + "extension": "interrupt_detector", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "chatgpt", + "extension": "openai_chatgpt" + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "chatgpt", + "extension": "openai_chatgpt" + } + ] + } + ] + } + ] } ] } -} \ No newline at end of file +} diff --git a/agents/ten_packages/extension/cartesia_tts/__init__.py b/agents/ten_packages/extension/cartesia_tts/__init__.py new file mode 100644 index 00000000..f6bb8f4c --- /dev/null +++ b/agents/ten_packages/extension/cartesia_tts/__init__.py @@ -0,0 +1,6 @@ +from . import cartesia_tts_addon +from .extension import EXTENSION_NAME +from .log import logger + + +logger.info(f"{EXTENSION_NAME} extension loaded") diff --git a/agents/ten_packages/extension/cartesia_tts/cartesia_tts_addon.py b/agents/ten_packages/extension/cartesia_tts/cartesia_tts_addon.py new file mode 100644 index 00000000..f633d7e3 --- /dev/null +++ b/agents/ten_packages/extension/cartesia_tts/cartesia_tts_addon.py @@ -0,0 +1,24 @@ +# +# +# Agora Real Time Engagement +# Created by XinHui Li in 2024-07. +# Copyright (c) 2024 Agora IO. All rights reserved. +# +# + +from ten import ( + Addon, + register_addon_as_extension, + TenEnv, +) +from .extension import EXTENSION_NAME +from .log import logger + + +@register_addon_as_extension(EXTENSION_NAME) +class CartesiaTTSExtensionAddon(Addon): + def on_create_instance(self, ten: TenEnv, addon_name: str, context) -> None: + logger.info("on_create_instance") + from .cartesia_tts_extension import CartesiaTTSExtension + + ten.on_create_instance_done(CartesiaTTSExtension(addon_name), context) diff --git a/agents/ten_packages/extension/cartesia_tts/cartesia_tts_extension.py b/agents/ten_packages/extension/cartesia_tts/cartesia_tts_extension.py new file mode 100644 index 00000000..f18c9af3 --- /dev/null +++ b/agents/ten_packages/extension/cartesia_tts/cartesia_tts_extension.py @@ -0,0 +1,197 @@ +# cartesia_tts_extension.py + +import queue +import threading +from datetime import datetime +import asyncio +import re +from ten import ( + Extension, + TenEnv, + Cmd, + AudioFrameDataFmt, + AudioFrame, + Data, + StatusCode, + CmdResult, +) +from .cartesia_wrapper import CartesiaWrapper, CartesiaConfig, CartesiaError +from .log import logger + +class CartesiaCallback: + # Handles audio processing and interrupt checks + def __init__(self, ten: TenEnv, sample_rate: int, need_interrupt_callback): + self.ten = ten + self.sample_rate = sample_rate + self.need_interrupt_callback = need_interrupt_callback + self.ts = datetime.now() + + def set_input_ts(self, ts: datetime): + # Updates timestamp for the current input + self.ts = ts + + def need_interrupt(self) -> bool: + # Checks if current task should be interrupted + return self.need_interrupt_callback(self.ts) + + def create_audio_frame(self, audio_data): + # Creates an AudioFrame from raw audio data + frame = AudioFrame.create("pcm_frame") + frame.set_sample_rate(self.sample_rate) + frame.set_bytes_per_sample(2) # s16le is 2 bytes per sample + frame.set_number_of_channels(1) + frame.set_data_fmt(AudioFrameDataFmt.INTERLEAVE) + frame.set_samples_per_channel(len(audio_data) // 2) + frame.alloc_buf(len(audio_data)) + buff = frame.lock_buf() + buff[:] = audio_data + frame.unlock_buf(buff) + return frame + + def process_audio(self, audio_data): + # Processes audio data if not interrupted + if self.need_interrupt(): + return + audio_frame = self.create_audio_frame(audio_data) + self.ten.send_audio_frame(audio_frame) + +class CartesiaTTSExtension(Extension): + def __init__(self, name: str): + super().__init__(name) + self.cartesia = None + self.loop = None + self.queue = queue.Queue() + self.outdate_ts = datetime.now() + self.stopped = False + self.thread = None + self.callback = None + self.skip_patterns = [r'\bssml_\w+\b'] # List of patterns to skip + self.ten = None + + def on_start(self, ten: TenEnv) -> None: + self.ten = ten + try: + # Initialize Cartesia config and wrapper + cartesia_config = CartesiaConfig( + api_key=ten.get_property_string("api_key"), + model_id=ten.get_property_string("model_id"), + voice_id=ten.get_property_string("voice_id"), + sample_rate=int(ten.get_property_string("sample_rate")), + cartesia_version=ten.get_property_string("cartesia_version") + ) + self.cartesia = CartesiaWrapper(cartesia_config) + self.callback = CartesiaCallback(ten, cartesia_config.sample_rate, self.need_interrupt) + + # Set up asyncio event loop + self.loop = asyncio.new_event_loop() + asyncio.set_event_loop(self.loop) + + # Connect to Cartesia API + self.loop.run_until_complete(self.cartesia.connect()) + logger.info("Successfully connected to Cartesia API") + + # Start async handling thread + self.thread = threading.Thread(target=self.async_handle, args=[ten]) + self.thread.start() + + ten.on_start_done() + except Exception as e: + logger.error(f"Failed to start CartesiaTTSExtension: {e}") + ten.on_start_done() + + def on_stop(self, ten: TenEnv) -> None: + # Clean up resources and stop thread + self.stopped = True + self.flush() + self.queue.put(None) + if self.thread is not None: + self.thread.join() + self.thread = None + + if self.cartesia: + self.loop.run_until_complete(self.cartesia.close()) + if self.loop: + self.loop.close() + ten.on_stop_done() + + def need_interrupt(self, ts: datetime) -> bool: + # Check if task is outdated + return self.outdate_ts > ts + + def process_input_text(self, input_text: str) -> str: + # Process input text to remove parts that should be skipped + for pattern in self.skip_patterns: + input_text = re.sub(pattern, '', input_text, flags=re.IGNORECASE) + return input_text.strip() + + def create_pause_text(self, duration_ms: int) -> str: + # Create pause text + return f"PAUSE_{duration_ms}_MS" + + def on_data(self, ten: TenEnv, data: Data) -> None: + # Queue incoming text for processing + input_text = data.get_property_string("text") + if not input_text: + return + + # Handle the case of just a period or comma + if input_text.strip() in ['.', ',']: + pause_duration = 150 if input_text.strip() == '.' else 150 + pause_text = self.create_pause_text(pause_duration) + self.queue.put(("PAUSE", pause_text, datetime.now())) + return + + processed_text = self.process_input_text(input_text) + + if processed_text.strip(): + self.queue.put(("TEXT", processed_text, datetime.now())) + else: + logger.info("Processed text is empty. Skipping synthesis.") + + def async_handle(self, ten: TenEnv): + # Process queue items asynchronously + while not self.stopped: + try: + value = self.queue.get() + if value is None: + break + + item_type, content, ts = value + + self.callback.set_input_ts(ts) + + if self.callback.need_interrupt(): + logger.info("Drop outdated input") + continue + + try: + audio_data = self.loop.run_until_complete(self.cartesia.synthesize(content)) + self.callback.process_audio(audio_data) + except CartesiaError as e: + logger.error(f"Failed to synthesize: {str(e)}. Moving to next item.") + # Optionally, you could add some fallback behavior here, like playing an error sound + + except Exception as e: + logger.exception(f"Error in async_handle: {e}") + # Continue processing the next item instead of breaking the loop + + def on_cmd(self, ten: TenEnv, cmd: Cmd) -> None: + # Handle incoming commands + cmd_name = cmd.get_name() + + if cmd_name == "flush": + self.outdate_ts = datetime.now() + self.flush() + cmd_result = CmdResult.create(StatusCode.OK) + cmd_result.set_property_string("detail", "Flush command executed") + else: + logger.warning(f"Unknown command received: {cmd_name}") + cmd_result = CmdResult.create(StatusCode.ERROR) + cmd_result.set_property_string("detail", f"Unknown command: {cmd_name}") + + ten.return_result(cmd_result, cmd) + + def flush(self): + # Clear the queue + while not self.queue.empty(): + self.queue.get() diff --git a/agents/ten_packages/extension/cartesia_tts/cartesia_wrapper.py b/agents/ten_packages/extension/cartesia_tts/cartesia_wrapper.py new file mode 100644 index 00000000..37a3d7c4 --- /dev/null +++ b/agents/ten_packages/extension/cartesia_tts/cartesia_wrapper.py @@ -0,0 +1,112 @@ +# cartesia_wrapper.py + +import asyncio +import websockets +import json +import base64 +import logging +from urllib.parse import urlparse + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +class CartesiaError(Exception): + """Custom exception class for Cartesia-related errors.""" + pass + +class CartesiaConfig: + # Configuration class for Cartesia API + def __init__(self, api_key, model_id, voice_id, sample_rate, cartesia_version): + self.api_key = api_key + self.model_id = model_id + self.voice_id = voice_id + self.sample_rate = sample_rate + self.cartesia_version = cartesia_version + +class CartesiaWrapper: + # Wrapper class for Cartesia API interactions + def __init__(self, config: CartesiaConfig): + self.config = config + self.websocket = None + self.context_id = 0 + + async def connect(self): + # Establish WebSocket connection to Cartesia API + ws_url = f"wss://api.cartesia.ai/tts/websocket?api_key={self.config.api_key}&cartesia_version={self.config.cartesia_version}" + try: + self.websocket = await websockets.connect(ws_url) + logger.info("Connected to Cartesia WebSocket") + except Exception as e: + logger.error(f"Failed to connect to Cartesia API: {str(e)}") + raise CartesiaError(f"Connection failed: {str(e)}") + + async def synthesize(self, text: str): + # Synthesize speech from text using Cartesia API + if not self.websocket: + await self.connect() + + if text.startswith("PAUSE_"): + # Handle custom pause marker + try: + duration_ms = int(text.split("_")[1]) + return self.generate_silence(duration_ms) + except (IndexError, ValueError): + logger.error(f"Invalid pause format: {text}") + raise CartesiaError(f"Invalid pause format: {text}") + + self.context_id += 1 + request = { + "context_id": f"context_{self.context_id}", + "model_id": self.config.model_id, + "transcript": text, + "voice": {"mode": "id", "id": self.config.voice_id}, + "output_format": { + "container": "raw", + "encoding": "pcm_s16le", + "sample_rate": int(self.config.sample_rate) + }, + "language": "en", + "add_timestamps": False + } + + try: + # Send synthesis request + await self.websocket.send(json.dumps(request)) + + # Receive and process audio chunks + audio_data = bytearray() + while True: + response = await self.websocket.recv() + message = json.loads(response) + + if message['type'] == 'chunk': + chunk_data = base64.b64decode(message['data']) + audio_data.extend(chunk_data) + elif message['type'] == 'done': + break + elif message['type'] == 'error': + raise CartesiaError(f"Synthesis error: {message.get('error', 'Unknown error')}") + else: + logger.warning(f"Unknown message type: {message['type']}") + + return audio_data + except websockets.exceptions.ConnectionClosed: + # Handle connection errors and retry + logger.error("WebSocket connection closed unexpectedly. Attempting to reconnect...") + await self.connect() + return await self.synthesize(text) # Retry the synthesis after reconnecting + except Exception as e: + logger.error(f"Error during synthesis: {str(e)}") + raise CartesiaError(f"Synthesis failed: {str(e)}") + + def generate_silence(self, duration_ms: int) -> bytes: + # Generate silent audio data + sample_rate = self.config.sample_rate + num_samples = int(sample_rate * duration_ms / 1000) + return b"\x00" * (num_samples * 2) # Assuming 16-bit audio + + async def close(self): + # Close WebSocket connection + if self.websocket: + await self.websocket.close() + logger.info("Closed WebSocket connection to Cartesia API") diff --git a/agents/ten_packages/extension/cartesia_tts/extension.py b/agents/ten_packages/extension/cartesia_tts/extension.py new file mode 100644 index 00000000..4883c11c --- /dev/null +++ b/agents/ten_packages/extension/cartesia_tts/extension.py @@ -0,0 +1 @@ +EXTENSION_NAME = "cartesia_tts" diff --git a/agents/ten_packages/extension/cartesia_tts/log.py b/agents/ten_packages/extension/cartesia_tts/log.py new file mode 100644 index 00000000..fad21710 --- /dev/null +++ b/agents/ten_packages/extension/cartesia_tts/log.py @@ -0,0 +1,12 @@ +import logging +from .extension import EXTENSION_NAME + +logger = logging.getLogger(EXTENSION_NAME) +logger.setLevel(logging.INFO) + +formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(process)d - [%(filename)s:%(lineno)d] - %(message)s") + +console_handler = logging.StreamHandler() +console_handler.setFormatter(formatter) + +logger.addHandler(console_handler) diff --git a/agents/ten_packages/extension/cartesia_tts/manifest.json b/agents/ten_packages/extension/cartesia_tts/manifest.json new file mode 100644 index 00000000..41797bb5 --- /dev/null +++ b/agents/ten_packages/extension/cartesia_tts/manifest.json @@ -0,0 +1,56 @@ +{ + "type": "extension", + "name": "cartesia_tts", + "version": "0.1.0", + "dependencies": [ + { + "type": "system", + "name": "ten_runtime_python", + "version": "0.2" + } + ], + "api": { + "property": { + "api_key": { + "type": "string" + }, + "cartesia_version": { + "type": "string" + }, + "model_id": { + "type": "string" + }, + "sample_rate": { + "type": "string" + }, + "voice_id": { + "type": "string" + } + }, + "data_in": [ + { + "name": "text_data", + "property": { + "text": { + "type": "string" + } + } + } + ], + "cmd_in": [ + { + "name": "flush" + } + ], + "cmd_out": [ + { + "name": "flush" + } + ], + "audio_frame_out": [ + { + "name": "pcm_frame" + } + ] + } +} diff --git a/agents/ten_packages/extension/cartesia_tts/property.json b/agents/ten_packages/extension/cartesia_tts/property.json new file mode 100644 index 00000000..9e26dfee --- /dev/null +++ b/agents/ten_packages/extension/cartesia_tts/property.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/agents/ten_packages/extension/deepgram_asr_python/__init__.py b/agents/ten_packages/extension/deepgram_asr_python/__init__.py new file mode 100644 index 00000000..71578b73 --- /dev/null +++ b/agents/ten_packages/extension/deepgram_asr_python/__init__.py @@ -0,0 +1,5 @@ +from . import deepgram_asr_addon +from .extension import EXTENSION_NAME +from .log import logger + +logger.info(f"{EXTENSION_NAME} extension loaded") diff --git a/agents/ten_packages/extension/deepgram_asr_python/deepgram_asr_addon.py b/agents/ten_packages/extension/deepgram_asr_python/deepgram_asr_addon.py new file mode 100644 index 00000000..8551dd87 --- /dev/null +++ b/agents/ten_packages/extension/deepgram_asr_python/deepgram_asr_addon.py @@ -0,0 +1,14 @@ +from ten import ( + Addon, + register_addon_as_extension, + TenEnv, +) +from .extension import EXTENSION_NAME +from .log import logger +from .deepgram_asr_extension import DeepgramASRExtension + +@register_addon_as_extension(EXTENSION_NAME) +class DeepgramASRExtensionAddon(Addon): + def on_create_instance(self, ten: TenEnv, addon_name: str, context) -> None: + logger.info("on_create_instance") + ten.on_create_instance_done(DeepgramASRExtension(addon_name), context) diff --git a/agents/ten_packages/extension/deepgram_asr_python/deepgram_asr_extension.py b/agents/ten_packages/extension/deepgram_asr_python/deepgram_asr_extension.py new file mode 100644 index 00000000..fbaccbdc --- /dev/null +++ b/agents/ten_packages/extension/deepgram_asr_python/deepgram_asr_extension.py @@ -0,0 +1,104 @@ +from ten import ( + Extension, + TenEnv, + Cmd, + AudioFrame, + StatusCode, + CmdResult, +) + +import asyncio +import threading + +from .log import logger +from .deepgram_wrapper import AsyncDeepgramWrapper, DeepgramConfig + +PROPERTY_API_KEY = "api_key" # Required +PROPERTY_LANG = "language" # Optional +PROPERTY_MODEL = "model" # Optional +PROPERTY_SAMPLE_RATE = "sample_rate" # Optional + + +class DeepgramASRExtension(Extension): + def __init__(self, name: str): + super().__init__(name) + + self.stopped = False + self.queue = asyncio.Queue(maxsize=3000) # about 3000 * 10ms = 30s input + self.deepgram = None + self.thread = None + + self.loop = asyncio.new_event_loop() + asyncio.set_event_loop(self.loop) + + def on_start(self, ten: TenEnv) -> None: + logger.info("on_start") + + deepgram_config = DeepgramConfig.default_config() + + try: + deepgram_config.api_key = ten.get_property_string(PROPERTY_API_KEY).strip() + except Exception as e: + logger.error(f"get property {PROPERTY_API_KEY} error: {e}") + return + + for optional_param in [ + PROPERTY_LANG, + PROPERTY_MODEL, + PROPERTY_SAMPLE_RATE, + ]: + try: + value = ten.get_property_string(optional_param).strip() + if value: + deepgram_config.__setattr__(optional_param, value) + except Exception as err: + logger.debug( + f"get property optional {optional_param} failed, err: {err}. Using default value: {deepgram_config.__getattribute__(optional_param)}" + ) + + self.deepgram = AsyncDeepgramWrapper( + deepgram_config, self.queue, ten, self.loop + ) + + logger.info("starting async_deepgram_wrapper thread") + self.thread = threading.Thread(target=self.deepgram.run, args=[]) + self.thread.start() + + ten.on_start_done() + + def put_pcm_frame(self, pcm_frame: AudioFrame) -> None: + try: + asyncio.run_coroutine_threadsafe( + self.queue.put(pcm_frame), self.loop + ).result(timeout=0.5) + except asyncio.QueueFull: + logger.exception("queue is full, dropping frame") + except Exception as e: + logger.exception(f"error putting frame in queue: {e}") + + def on_audio_frame(self, ten: TenEnv, frame: AudioFrame) -> None: + self.put_pcm_frame(pcm_frame=frame) + + def on_stop(self, ten: TenEnv) -> None: + logger.info("on_stop") + + # put an empty frame to stop deepgram_wrapper + self.put_pcm_frame(None) + self.stopped = True + self.thread.join() + self.loop.stop() + self.loop.close() + + ten.on_stop_done() + + def on_cmd(self, ten: TenEnv, cmd: Cmd) -> None: + logger.info("on_cmd") + cmd_json = cmd.to_json() + logger.info("on_cmd json: " + cmd_json) + + cmdName = cmd.get_name() + logger.info("got cmd %s" % cmdName) + + cmd_result = CmdResult.create(StatusCode.OK) + cmd_result.set_property_string("detail", "success") + ten.return_result(cmd_result, cmd) diff --git a/agents/ten_packages/extension/deepgram_asr_python/deepgram_config.py b/agents/ten_packages/extension/deepgram_asr_python/deepgram_config.py new file mode 100644 index 00000000..6fa5f16f --- /dev/null +++ b/agents/ten_packages/extension/deepgram_asr_python/deepgram_config.py @@ -0,0 +1,26 @@ +from typing import Union + +class DeepgramConfig: + def __init__(self, + api_key: str, + language: str, + model: str, + sample_rate: Union[str, int]): + self.api_key = api_key + self.language = language + self.model = model + self.sample_rate = int(sample_rate) + + self.channels = 1 + self.encoding = 'linear16' + self.interim_results = True + self.punctuate = True + + @classmethod + def default_config(cls): + return cls( + api_key="", + language="en-US", + model="nova-2", + sample_rate=16000 + ) \ No newline at end of file diff --git a/agents/ten_packages/extension/deepgram_asr_python/deepgram_wrapper.py b/agents/ten_packages/extension/deepgram_asr_python/deepgram_wrapper.py new file mode 100644 index 00000000..66df70d8 --- /dev/null +++ b/agents/ten_packages/extension/deepgram_asr_python/deepgram_wrapper.py @@ -0,0 +1,129 @@ +import asyncio + +from ten import ( + TenEnv, + Data +) + +from deepgram import ListenWebSocketClient, DeepgramClientOptions, LiveTranscriptionEvents, LiveOptions + +from .log import logger +from .deepgram_config import DeepgramConfig + +DATA_OUT_TEXT_DATA_PROPERTY_TEXT = "text" +DATA_OUT_TEXT_DATA_PROPERTY_IS_FINAL = "is_final" +DATA_OUT_TEXT_DATA_PROPERTY_STREAM_ID = "stream_id" +DATA_OUT_TEXT_DATA_PROPERTY_END_OF_SEGMENT = "end_of_segment" + +def create_and_send_data(ten: TenEnv, text_result: str, is_final: bool, stream_id: int): + stable_data = Data.create("text_data") + stable_data.set_property_bool(DATA_OUT_TEXT_DATA_PROPERTY_IS_FINAL, is_final) + stable_data.set_property_string(DATA_OUT_TEXT_DATA_PROPERTY_TEXT, text_result) + stable_data.set_property_int(DATA_OUT_TEXT_DATA_PROPERTY_STREAM_ID, stream_id) + stable_data.set_property_bool(DATA_OUT_TEXT_DATA_PROPERTY_END_OF_SEGMENT, is_final) + ten.send_data(stable_data) + + +class AsyncDeepgramWrapper(): + def __init__(self, config: DeepgramConfig, queue: asyncio.Queue, ten:TenEnv, loop: asyncio.BaseEventLoop): + self.queue = queue + self.ten = ten + self.stopped = False + self.config = config + self.loop = loop + self.stream_id = 0 + + logger.info(f"init deepgram client with api key: {config.api_key[:5]}") + self.deepgram_client = ListenWebSocketClient(config=DeepgramClientOptions( + api_key=config.api_key, + )) + + self.start(ten) + asyncio.set_event_loop(self.loop) + + def start(self, ten:TenEnv) -> None: + logger.info(f"start deepgram") + + super = self + + def on_open(self, open, **kwargs): + logger.info(f"deepgram on_open: {open}") + + def on_close(self, close, **kwargs): + logger.info(f"deepgram on_close: {close}") + + def on_message(self, result, **kwargs): + sentence = result.channel.alternatives[0].transcript + + if len(sentence) == 0: + return + + is_final = result.is_final + logger.info(f"deepgram got sentence: [{sentence}], is_final: {is_final}, stream_id: {super.stream_id}") + + create_and_send_data(ten=ten, text_result=sentence, is_final=is_final, stream_id=super.stream_id) + + def on_error(self, error, **kwargs): + logger.error(f"deepgram on_error: {error}") + + self.deepgram_client.on(LiveTranscriptionEvents.Open, on_open) + self.deepgram_client.on(LiveTranscriptionEvents.Close, on_close) + self.deepgram_client.on(LiveTranscriptionEvents.Transcript, on_message) + self.deepgram_client.on(LiveTranscriptionEvents.Error, on_error) + + options = LiveOptions(language=self.config.language, + model=self.config.model, + sample_rate=self.config.sample_rate, + channels=self.config.channels, + encoding=self.config.encoding, + interim_results=self.config.interim_results, + punctuate=self.config.punctuate) + # connect to websocket + if self.deepgram_client.start(options) is False: + logger.error(f"failed to connect to Deepgram") + return + + logger.info(f"successfully connected to Deepgram") + + async def send_frame(self) -> None: + while not self.stopped: + try: + pcm_frame = await asyncio.wait_for(self.queue.get(), timeout=10.0) + + if pcm_frame is None: + logger.warning("send_frame: exit due to None value got.") + return + + frame_buf = pcm_frame.get_buf() + if not frame_buf: + logger.warning("send_frame: empty pcm_frame detected.") + continue + + self.stream_id = pcm_frame.get_property_int('stream_id') + self.deepgram_client.send(frame_buf) + self.queue.task_done() + except asyncio.TimeoutError: + logger.exception(f"error in send_frame: {e}") + except IOError as e: + logger.exception(f"error in send_frame: {e}") + except Exception as e: + logger.exception(f"error in send_frame: {e}") + raise e + + logger.info("send_frame: exit due to self.stopped == True") + + async def deepgram_loop(self) -> None: + try: + await self.send_frame() + except Exception as e: + logger.exception(e) + finally: + await self.cleanup() + + def run(self) -> None: + self.loop.run_until_complete(self.deepgram_loop()) + self.loop.close() + logger.info("async_deepgram_wrapper: thread completed.") + + def stop(self) -> None: + self.stopped = True diff --git a/agents/ten_packages/extension/deepgram_asr_python/extension.py b/agents/ten_packages/extension/deepgram_asr_python/extension.py new file mode 100644 index 00000000..43c52445 --- /dev/null +++ b/agents/ten_packages/extension/deepgram_asr_python/extension.py @@ -0,0 +1,3 @@ +# extension.py + +EXTENSION_NAME = "deepgram_asr_python" diff --git a/agents/ten_packages/extension/deepgram_asr_python/log.py b/agents/ten_packages/extension/deepgram_asr_python/log.py new file mode 100644 index 00000000..88a2cb1c --- /dev/null +++ b/agents/ten_packages/extension/deepgram_asr_python/log.py @@ -0,0 +1,16 @@ +# log.py + +import logging +from .extension import EXTENSION_NAME + +logger = logging.getLogger(EXTENSION_NAME) +logger.setLevel(logging.INFO) + +formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(process)d - [%(filename)s:%(lineno)d] - %(message)s" +) + +console_handler = logging.StreamHandler() +console_handler.setFormatter(formatter) + +logger.addHandler(console_handler) diff --git a/agents/ten_packages/extension/deepgram_asr_python/manifest.json b/agents/ten_packages/extension/deepgram_asr_python/manifest.json new file mode 100644 index 00000000..6d04eeef --- /dev/null +++ b/agents/ten_packages/extension/deepgram_asr_python/manifest.json @@ -0,0 +1,87 @@ +{ + "type": "extension", + "name": "deepgram_asr_python", + "version": "0.1.0", + "dependencies": [ + { + "type": "system", + "name": "ten_runtime_python", + "version": "0.2" + } + ], + "api": { + "property": { + "api_key": { + "type": "string" + }, + "model": { + "type": "string" + }, + "language": { + "type": "string" + }, + "sample_rate": { + "type": "string" + } + }, + "audio_frame_in": [ + { + "name": "pcm_frame" + } + ], + "cmd_in": [ + { + "name": "on_user_joined", + "property": { + "user_id": { + "type": "string" + } + } + }, + { + "name": "on_user_left", + "property": { + "user_id": { + "type": "string" + } + } + }, + { + "name": "on_connection_failure", + "property": { + "error": { + "type": "string" + } + } + } + ], + "data_out": [ + { + "name": "text_data", + "property": { + "time": { + "type": "int64" + }, + "duration_ms": { + "type": "int64" + }, + "language": { + "type": "string" + }, + "text": { + "type": "string" + }, + "is_final": { + "type": "bool" + }, + "stream_id": { + "type": "uint32" + }, + "end_of_segment": { + "type": "bool" + } + } + } + ] + } +} diff --git a/agents/ten_packages/extension/deepgram_asr_python/property.json b/agents/ten_packages/extension/deepgram_asr_python/property.json new file mode 100644 index 00000000..9e26dfee --- /dev/null +++ b/agents/ten_packages/extension/deepgram_asr_python/property.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/agents/ten_packages/extension/deepgram_asr_python/requirements.txt b/agents/ten_packages/extension/deepgram_asr_python/requirements.txt new file mode 100644 index 00000000..8c9fa1e8 --- /dev/null +++ b/agents/ten_packages/extension/deepgram_asr_python/requirements.txt @@ -0,0 +1 @@ +deepgram-sdk==3.7.5 \ No newline at end of file From 095ae75dfcf5e130302cb176a99a82650b400d48 Mon Sep 17 00:00:00 2001 From: sunshinexcode <24xinhui@163.com> Date: Fri, 25 Oct 2024 22:10:58 +0800 Subject: [PATCH 2/5] chore(): modify to use async and keep the connection --- .../deepgram_asr_python/deepgram_wrapper.py | 37 +++++++++---------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/agents/ten_packages/extension/deepgram_asr_python/deepgram_wrapper.py b/agents/ten_packages/extension/deepgram_asr_python/deepgram_wrapper.py index 66df70d8..fd74ae5d 100644 --- a/agents/ten_packages/extension/deepgram_asr_python/deepgram_wrapper.py +++ b/agents/ten_packages/extension/deepgram_asr_python/deepgram_wrapper.py @@ -5,7 +5,7 @@ Data ) -from deepgram import ListenWebSocketClient, DeepgramClientOptions, LiveTranscriptionEvents, LiveOptions +from deepgram import AsyncListenWebSocketClient, DeepgramClientOptions, LiveTranscriptionEvents, LiveOptions from .log import logger from .deepgram_config import DeepgramConfig @@ -34,25 +34,26 @@ def __init__(self, config: DeepgramConfig, queue: asyncio.Queue, ten:TenEnv, loo self.stream_id = 0 logger.info(f"init deepgram client with api key: {config.api_key[:5]}") - self.deepgram_client = ListenWebSocketClient(config=DeepgramClientOptions( + self.deepgram_client = AsyncListenWebSocketClient(config=DeepgramClientOptions( api_key=config.api_key, + options={"keepalive": "true"} )) - self.start(ten) asyncio.set_event_loop(self.loop) + self.loop.create_task(self.start_listen(ten)) - def start(self, ten:TenEnv) -> None: - logger.info(f"start deepgram") + async def start_listen(self, ten:TenEnv) -> None: + logger.info(f"start and listen deepgram") super = self - def on_open(self, open, **kwargs): - logger.info(f"deepgram on_open: {open}") + async def on_open(self, open, **kwargs): + logger.info(f"deepgram event callback on_open: {open}") - def on_close(self, close, **kwargs): - logger.info(f"deepgram on_close: {close}") + async def on_close(self, close, **kwargs): + logger.info(f"deepgram event callback on_close: {close}") - def on_message(self, result, **kwargs): + async def on_message(self, result, **kwargs): sentence = result.channel.alternatives[0].transcript if len(sentence) == 0: @@ -63,8 +64,8 @@ def on_message(self, result, **kwargs): create_and_send_data(ten=ten, text_result=sentence, is_final=is_final, stream_id=super.stream_id) - def on_error(self, error, **kwargs): - logger.error(f"deepgram on_error: {error}") + async def on_error(self, error, **kwargs): + logger.error(f"deepgram event callback on_error: {error}") self.deepgram_client.on(LiveTranscriptionEvents.Open, on_open) self.deepgram_client.on(LiveTranscriptionEvents.Close, on_close) @@ -79,11 +80,11 @@ def on_error(self, error, **kwargs): interim_results=self.config.interim_results, punctuate=self.config.punctuate) # connect to websocket - if self.deepgram_client.start(options) is False: - logger.error(f"failed to connect to Deepgram") + if await self.deepgram_client.start(options) is False: + logger.error(f"failed to connect to deepgram") return - logger.info(f"successfully connected to Deepgram") + logger.info(f"successfully connected to deepgram") async def send_frame(self) -> None: while not self.stopped: @@ -100,9 +101,9 @@ async def send_frame(self) -> None: continue self.stream_id = pcm_frame.get_property_int('stream_id') - self.deepgram_client.send(frame_buf) + await self.deepgram_client.send(frame_buf) self.queue.task_done() - except asyncio.TimeoutError: + except asyncio.TimeoutError as e: logger.exception(f"error in send_frame: {e}") except IOError as e: logger.exception(f"error in send_frame: {e}") @@ -117,8 +118,6 @@ async def deepgram_loop(self) -> None: await self.send_frame() except Exception as e: logger.exception(e) - finally: - await self.cleanup() def run(self) -> None: self.loop.run_until_complete(self.deepgram_loop()) From 910bcb6cf3d61998ffa56bee750b215f8e52bd0a Mon Sep 17 00:00:00 2001 From: sunshinexcode <24xinhui@163.com> Date: Mon, 28 Oct 2024 10:55:31 +0800 Subject: [PATCH 3/5] chore(): upgrade ten_runtime_python version to 0.3 --- agents/ten_packages/extension/cartesia_tts/manifest.json | 2 +- agents/ten_packages/extension/deepgram_asr_python/manifest.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/agents/ten_packages/extension/cartesia_tts/manifest.json b/agents/ten_packages/extension/cartesia_tts/manifest.json index 41797bb5..24269a64 100644 --- a/agents/ten_packages/extension/cartesia_tts/manifest.json +++ b/agents/ten_packages/extension/cartesia_tts/manifest.json @@ -6,7 +6,7 @@ { "type": "system", "name": "ten_runtime_python", - "version": "0.2" + "version": "0.3" } ], "api": { diff --git a/agents/ten_packages/extension/deepgram_asr_python/manifest.json b/agents/ten_packages/extension/deepgram_asr_python/manifest.json index 6d04eeef..5875da2d 100644 --- a/agents/ten_packages/extension/deepgram_asr_python/manifest.json +++ b/agents/ten_packages/extension/deepgram_asr_python/manifest.json @@ -6,7 +6,7 @@ { "type": "system", "name": "ten_runtime_python", - "version": "0.2" + "version": "0.3" } ], "api": { From 6549b8ce15d0f1cf9ccb92300b3814dc7bb27151 Mon Sep 17 00:00:00 2001 From: sunshinexcode <24xinhui@163.com> Date: Tue, 29 Oct 2024 10:45:19 +0800 Subject: [PATCH 4/5] chore(): add graph va_deepgram_openai_cartesia --- agents/property.json | 553 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 553 insertions(+) diff --git a/agents/property.json b/agents/property.json index 0e176b27..e0a5d5c0 100644 --- a/agents/property.json +++ b/agents/property.json @@ -2941,6 +2941,559 @@ ] } ] + }, + { + "name": "va_openai_v2v_storage", + "auto_start": true, + "nodes": [ + { + "type": "extension", + "extension_group": "rtc", + "addon": "agora_rtc", + "name": "agora_rtc", + "property": { + "app_id": "${env:AGORA_APP_ID}", + "token": "", + "channel": "ten_agent_test", + "stream_id": 1234, + "remote_stream_id": 123, + "subscribe_audio": true, + "publish_audio": true, + "publish_data": true, + "subscribe_audio_sample_rate": 24000 + } + }, + { + "type": "extension", + "extension_group": "llm", + "addon": "openai_v2v_python", + "name": "openai_v2v_python", + "property": { + "api_key": "${env:OPENAI_REALTIME_API_KEY}", + "temperature": 0.9, + "model": "gpt-4o-realtime-preview", + "max_tokens": 2048, + "voice": "alloy", + "language": "en-US", + "server_vad": true, + "dump": true, + "history": 10, + "enable_storage": true + } + }, + { + "type": "extension", + "extension_group": "transcriber", + "addon": "message_collector", + "name": "message_collector" + }, + { + "type": "extension", + "extension_group": "tools", + "addon": "weatherapi_tool_python", + "name": "weatherapi_tool_python", + "property": { + "api_key": "${env:WEATHERAPI_API_KEY}" + } + }, + { + "type": "extension", + "extension_group": "tools", + "addon": "bingsearch_tool_python", + "name": "bingsearch_tool_python", + "property": { + "api_key": "${env:BING_API_KEY}" + } + }, + { + "type": "extension", + "extension_group": "context", + "addon": "tsdb_firestore", + "name": "tsdb_firestore", + "property": { + "credentials": { + "type": "service_account", + "project_id": "${env:FIRESTORE_PROJECT_ID}", + "private_key_id": "${env:FIRESTORE_PRIVATE_KEY_ID}", + "private_key": "${env:FIRESTORE_PRIVATE_KEY}", + "client_email": "${env:FIRESTORE_CLIENT_EMAIL}", + "client_id": "${env:FIRESTORE_CLIENT_ID}", + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://oauth2.googleapis.com/token", + "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", + "client_x509_cert_url": "${env:FIRESTORE_CERT_URL}", + "universe_domain": "googleapis.com" + }, + "channel_name": "ten_agent_test", + "collection_name": "llm_context" + } + } + ], + "connections": [ + { + "extension_group": "rtc", + "extension": "agora_rtc", + "audio_frame": [ + { + "name": "pcm_frame", + "dest": [ + { + "extension_group": "realtime", + "extension": "openai_v2v_python" + } + ] + } + ] + }, + { + "extension_group": "tools", + "extension": "weatherapi_tool_python", + "cmd": [ + { + "name": "tool_register", + "dest": [ + { + "extension_group": "realtime", + "extension": "openai_v2v_python" + } + ] + } + ] + }, + { + "extension_group": "tools", + "extension": "bingsearch_tool_python", + "cmd": [ + { + "name": "tool_register", + "dest": [ + { + "extension_group": "realtime", + "extension": "openai_v2v_python" + } + ] + } + ] + }, + { + "extension_group": "realtime", + "extension": "openai_v2v_python", + "audio_frame": [ + { + "name": "pcm_frame", + "dest": [ + { + "extension_group": "rtc", + "extension": "agora_rtc" + } + ] + } + ], + "data": [ + { + "name": "append", + "dest": [ + { + "extension_group": "context", + "extension": "tsdb_firestore" + } + ] + }, + { + "name": "text_data", + "dest": [ + { + "extension_group": "transcriber", + "extension": "message_collector" + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "rtc", + "extension": "agora_rtc" + } + ] + }, + { + "name": "retrieve", + "dest": [ + { + "extension_group": "context", + "extension": "tsdb_firestore" + } + ] + }, + { + "name": "tool_call_get_current_weather", + "dest": [ + { + "extension_group": "tools", + "extension": "weatherapi_tool_python", + "msg_conversion": { + "type": "per_property", + "keep_original": true, + "rules": [ + { + "path": "_ten.name", + "conversion_mode": "fixed_value", + "value": "tool_call" + } + ] + } + } + ] + }, + { + "name": "tool_call_get_past_weather", + "dest": [ + { + "extension_group": "tools", + "extension": "weatherapi_tool_python", + "msg_conversion": { + "type": "per_property", + "keep_original": true, + "rules": [ + { + "path": "_ten.name", + "conversion_mode": "fixed_value", + "value": "tool_call" + } + ] + } + } + ] + }, + { + "name": "tool_call_get_future_weather", + "dest": [ + { + "extension_group": "tools", + "extension": "weatherapi_tool_python", + "msg_conversion": { + "type": "per_property", + "keep_original": true, + "rules": [ + { + "path": "_ten.name", + "conversion_mode": "fixed_value", + "value": "tool_call" + } + ] + } + } + ] + }, + { + "name": "tool_call_bing_search", + "dest": [ + { + "extension_group": "tools", + "extension": "weatherapi_tool_python", + "msg_conversion": { + "type": "per_property", + "keep_original": true, + "rules": [ + { + "path": "_ten.name", + "conversion_mode": "fixed_value", + "value": "tool_call" + } + ] + } + } + ] + } + ] + }, + { + "extension_group": "transcriber", + "extension": "message_collector", + "data": [ + { + "name": "data", + "dest": [ + { + "extension_group": "rtc", + "extension": "agora_rtc" + } + ] + } + ] + } + ] + }, + { + "name": "va_deepgram_openai_cartesia", + "auto_start": false, + "nodes": [ + { + "type": "extension", + "extension_group": "default", + "addon": "agora_rtc", + "name": "agora_rtc", + "property": { + "app_id": "${env:AGORA_APP_ID}", + "token": "", + "channel": "ten_agent_test", + "stream_id": 1234, + "remote_stream_id": 123, + "subscribe_audio": true, + "publish_audio": true, + "publish_data": true, + "enable_agora_asr": false, + "agora_asr_vendor_name": "microsoft", + "agora_asr_language": "en-US", + "agora_asr_vendor_key": "${env:AZURE_STT_KEY}", + "agora_asr_vendor_region": "${env:AZURE_STT_REGION}", + "agora_asr_session_control_file_path": "session_control.conf" + } + }, + { + "type": "extension", + "extension_group": "asr", + "addon": "deepgram_asr_python", + "name": "deepgram_asr", + "property": { + "api_key": "${env:DEEPGRAM_API_KEY}", + "language": "en-US", + "model": "nova-2", + "sample_rate": "16000" + } + }, + { + "type": "extension", + "extension_group": "chatgpt", + "addon": "openai_chatgpt", + "name": "openai_chatgpt", + "property": { + "base_url": "", + "api_key": "${env:OPENAI_API_KEY}", + "frequency_penalty": 0.9, + "model": "gpt-4o-mini", + "max_tokens": 512, + "prompt": "", + "proxy_url": "${env:OPENAI_PROXY_URL}", + "greeting": "TEN Agent connected. How can I help you today?", + "max_memory_length": 10 + } + }, + { + "type": "extension", + "extension_group": "tts", + "addon": "cartesia_tts", + "name": "cartesia_tts", + "property": { + "api_key": "${env:CARTESIA_API_KEY}", + "cartesia_version": "2024-06-10", + "model_id": "sonic-english", + "voice_id": "f9836c6e-a0bd-460e-9d3c-f7299fa60f94", + "sample_rate": "16000" + } + }, + { + "type": "extension", + "extension_group": "default", + "addon": "interrupt_detector_python", + "name": "interrupt_detector" + }, + { + "type": "extension", + "extension_group": "transcriber", + "addon": "message_collector", + "name": "message_collector" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "default" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "chatgpt" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "tts" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "asr" + } + ], + "connections": [ + { + "extension_group": "default", + "extension": "agora_rtc", + "audio_frame": [ + { + "name": "pcm_frame", + "dest": [ + { + "extension_group": "asr", + "extension": "deepgram_asr" + }, + { + "extension_group": "transcriber", + "extension": "message_collector" + } + ] + } + ], + "cmd": [ + { + "name": "on_user_joined", + "dest": [ + { + "extension_group": "asr", + "extension": "deepgram_asr" + } + ] + }, + { + "name": "on_user_left", + "dest": [ + { + "extension_group": "asr", + "extension": "deepgram_asr" + } + ] + }, + { + "name": "on_connection_failure", + "dest": [ + { + "extension_group": "asr", + "extension": "deepgram_asr" + } + ] + } + ] + }, + { + "extension_group": "asr", + "extension": "deepgram_asr", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "default", + "extension": "interrupt_detector" + }, + { + "extension_group": "transcriber", + "extension": "message_collector" + } + ] + } + ] + }, + { + "extension_group": "chatgpt", + "extension": "openai_chatgpt", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "tts", + "extension": "cartesia_tts" + }, + { + "extension_group": "transcriber", + "extension": "message_collector" + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "tts", + "extension": "cartesia_tts" + } + ] + } + ] + }, + { + "extension_group": "transcriber", + "extension": "message_collector", + "data": [ + { + "name": "data", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ] + }, + { + "extension_group": "tts", + "extension": "cartesia_tts", + "audio_frame": [ + { + "name": "pcm_frame", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ] + }, + { + "extension_group": "default", + "extension": "interrupt_detector", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "chatgpt", + "extension": "openai_chatgpt" + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "chatgpt", + "extension": "openai_chatgpt" + } + ] + } + ] + } + ] } ] } From 649e7fe40c37cdd62f22b1d4f596ee5c86b3b978 Mon Sep 17 00:00:00 2001 From: sunshinexcode <24xinhui@163.com> Date: Tue, 29 Oct 2024 10:58:31 +0800 Subject: [PATCH 5/5] fix(): remove type extension_group --- agents/property.json | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/agents/property.json b/agents/property.json index e0a5d5c0..16c26867 100644 --- a/agents/property.json +++ b/agents/property.json @@ -3306,26 +3306,6 @@ "extension_group": "transcriber", "addon": "message_collector", "name": "message_collector" - }, - { - "type": "extension_group", - "addon": "default_extension_group", - "name": "default" - }, - { - "type": "extension_group", - "addon": "default_extension_group", - "name": "chatgpt" - }, - { - "type": "extension_group", - "addon": "default_extension_group", - "name": "tts" - }, - { - "type": "extension_group", - "addon": "default_extension_group", - "name": "asr" } ], "connections": [