From 616c55b7e9cfcec8ebbc2ec7f4d5ec02f7346904 Mon Sep 17 00:00:00 2001 From: sunshinexcode <24xinhui@163.com> Date: Wed, 27 Nov 2024 17:06:06 +0800 Subject: [PATCH] refactor(): refactor cartesia tts (#444) --- agents/examples/experimental/property.json | 10 +- .../extension/cartesia_tts/BUILD.gn | 19 ++ .../extension/cartesia_tts/README.md | 29 +++ .../extension/cartesia_tts/__init__.py | 12 +- .../extension/cartesia_tts/addon.py | 19 ++ .../extension/cartesia_tts/cartesia_tts.py | 42 ++++ .../cartesia_tts/cartesia_tts_addon.py | 24 --- .../cartesia_tts/cartesia_tts_extension.py | 197 ------------------ .../cartesia_tts/cartesia_wrapper.py | 112 ---------- .../extension/cartesia_tts/extension.py | 53 ++++- .../extension/cartesia_tts/log.py | 12 -- .../extension/cartesia_tts/manifest.json | 115 +++++----- .../extension/cartesia_tts/requirements.txt | 1 + .../cartesia_tts/tests/test_basic.py | 36 ++++ .../deepgram_asr_python/manifest.json | 166 +++++++-------- 15 files changed, 355 insertions(+), 492 deletions(-) create mode 100644 agents/ten_packages/extension/cartesia_tts/BUILD.gn create mode 100644 agents/ten_packages/extension/cartesia_tts/README.md create mode 100644 agents/ten_packages/extension/cartesia_tts/addon.py create mode 100644 agents/ten_packages/extension/cartesia_tts/cartesia_tts.py delete mode 100644 agents/ten_packages/extension/cartesia_tts/cartesia_tts_addon.py delete mode 100644 agents/ten_packages/extension/cartesia_tts/cartesia_tts_extension.py delete mode 100644 agents/ten_packages/extension/cartesia_tts/cartesia_wrapper.py delete mode 100644 agents/ten_packages/extension/cartesia_tts/log.py create mode 100644 agents/ten_packages/extension/cartesia_tts/requirements.txt create mode 100644 agents/ten_packages/extension/cartesia_tts/tests/test_basic.py diff --git a/agents/examples/experimental/property.json b/agents/examples/experimental/property.json index b532fea5..39dd74b6 100644 --- a/agents/examples/experimental/property.json +++ b/agents/examples/experimental/property.json @@ -3009,7 +3009,7 @@ "api_key": "${env:DEEPGRAM_API_KEY}", "language": "en-US", "model": "nova-2", - "sample_rate": "16000" + "sample_rate": 16000 } }, { @@ -3036,10 +3036,10 @@ "name": "cartesia_tts", "property": { "api_key": "${env:CARTESIA_API_KEY}", - "cartesia_version": "2024-06-10", + "language": "en", "model_id": "sonic-english", - "voice_id": "f9836c6e-a0bd-460e-9d3c-f7299fa60f94", - "sample_rate": "16000" + "sample_rate": 16000, + "voice_id": "f9836c6e-a0bd-460e-9d3c-f7299fa60f94" } }, { @@ -3253,7 +3253,7 @@ "api_key": "${env:DEEPGRAM_API_KEY}", "language": "en-US", "model": "nova-2", - "sample_rate": "16000" + "sample_rate": 16000 } }, { diff --git a/agents/ten_packages/extension/cartesia_tts/BUILD.gn b/agents/ten_packages/extension/cartesia_tts/BUILD.gn new file mode 100644 index 00000000..4e409853 --- /dev/null +++ b/agents/ten_packages/extension/cartesia_tts/BUILD.gn @@ -0,0 +1,19 @@ +# +# This file is part of TEN Framework, an open source project. +# Licensed under the Apache License, Version 2.0. +# See the LICENSE file for more information. +# +import("//build/feature/ten_package.gni") + +ten_package("cartesia_tts") { + package_kind = "extension" + + resources = [ + "__init__.py", + "addon.py", + "extension.py", + "manifest.json", + "property.json", + "tests", + ] +} diff --git a/agents/ten_packages/extension/cartesia_tts/README.md b/agents/ten_packages/extension/cartesia_tts/README.md new file mode 100644 index 00000000..931f0029 --- /dev/null +++ b/agents/ten_packages/extension/cartesia_tts/README.md @@ -0,0 +1,29 @@ +# cartesia_tts + + + +## Features + + + +- xxx feature + +## API + +Refer to `api` definition in [manifest.json] and default values in [property.json](property.json). + + + +## Development + +### Build + + + +### Unit test + + + +## Misc + + diff --git a/agents/ten_packages/extension/cartesia_tts/__init__.py b/agents/ten_packages/extension/cartesia_tts/__init__.py index f6bb8f4c..72593ab2 100644 --- a/agents/ten_packages/extension/cartesia_tts/__init__.py +++ b/agents/ten_packages/extension/cartesia_tts/__init__.py @@ -1,6 +1,6 @@ -from . import cartesia_tts_addon -from .extension import EXTENSION_NAME -from .log import logger - - -logger.info(f"{EXTENSION_NAME} extension loaded") +# +# This file is part of TEN Framework, an open source project. +# Licensed under the Apache License, Version 2.0. +# See the LICENSE file for more information. +# +from . import addon diff --git a/agents/ten_packages/extension/cartesia_tts/addon.py b/agents/ten_packages/extension/cartesia_tts/addon.py new file mode 100644 index 00000000..55d63435 --- /dev/null +++ b/agents/ten_packages/extension/cartesia_tts/addon.py @@ -0,0 +1,19 @@ +# +# This file is part of TEN Framework, an open source project. +# Licensed under the Apache License, Version 2.0. +# See the LICENSE file for more information. +# +from ten import ( + Addon, + register_addon_as_extension, + TenEnv, +) + + +@register_addon_as_extension("cartesia_tts") +class CartesiaTTSExtensionAddon(Addon): + + def on_create_instance(self, ten_env: TenEnv, name: str, context) -> None: + from .extension import CartesiaTTSExtension + ten_env.log_info("CartesiaTTSExtensionAddon on_create_instance") + ten_env.on_create_instance_done(CartesiaTTSExtension(name), context) diff --git a/agents/ten_packages/extension/cartesia_tts/cartesia_tts.py b/agents/ten_packages/extension/cartesia_tts/cartesia_tts.py new file mode 100644 index 00000000..4cc79b49 --- /dev/null +++ b/agents/ten_packages/extension/cartesia_tts/cartesia_tts.py @@ -0,0 +1,42 @@ +# +# +# Agora Real Time Engagement +# Created by XinHui Li in 2024. +# Copyright (c) 2024 Agora IO. All rights reserved. +# +# + +from dataclasses import dataclass +from typing import AsyncIterator +from cartesia import AsyncCartesia + +from ten_ai_base.config import BaseConfig + + +@dataclass +class CartesiaTTSConfig(BaseConfig): + api_key: str = "" + language: str = "en" + model_id: str = "sonic-english" + request_timeout_seconds: int = 10 + sample_rate: int = 16000 + voice_id: str = "f9836c6e-a0bd-460e-9d3c-f7299fa60f94" + +class CartesiaTTS: + def __init__(self, config: CartesiaTTSConfig) -> None: + self.config = config + self.client = AsyncCartesia(api_key=config.api_key, timeout=config.request_timeout_seconds) + + def text_to_speech_stream(self, text: str) -> AsyncIterator[bytes]: + return self.client.tts.sse( + language=self.config.language, + model_id=self.config.model_id, + output_format={ + "container": "raw", + "encoding": "pcm_s16le", + "sample_rate": self.config.sample_rate, + }, + stream=True, + transcript=text, + voice_id=self.config.voice_id, + ) \ No newline at end of file diff --git a/agents/ten_packages/extension/cartesia_tts/cartesia_tts_addon.py b/agents/ten_packages/extension/cartesia_tts/cartesia_tts_addon.py deleted file mode 100644 index 1883fb5e..00000000 --- a/agents/ten_packages/extension/cartesia_tts/cartesia_tts_addon.py +++ /dev/null @@ -1,24 +0,0 @@ -# -# -# Agora Real Time Engagement -# Created by XinHui Li in 2024-07. -# Copyright (c) 2024 Agora IO. All rights reserved. -# -# - -from ten import ( - Addon, - register_addon_as_extension, - TenEnv, -) -from .extension import EXTENSION_NAME - - -@register_addon_as_extension(EXTENSION_NAME) -class CartesiaTTSExtensionAddon(Addon): - def on_create_instance(self, ten: TenEnv, addon_name: str, context) -> None: - from .log import logger - logger.info("on_create_instance") - from .cartesia_tts_extension import CartesiaTTSExtension - - ten.on_create_instance_done(CartesiaTTSExtension(addon_name), context) diff --git a/agents/ten_packages/extension/cartesia_tts/cartesia_tts_extension.py b/agents/ten_packages/extension/cartesia_tts/cartesia_tts_extension.py deleted file mode 100644 index f18c9af3..00000000 --- a/agents/ten_packages/extension/cartesia_tts/cartesia_tts_extension.py +++ /dev/null @@ -1,197 +0,0 @@ -# cartesia_tts_extension.py - -import queue -import threading -from datetime import datetime -import asyncio -import re -from ten import ( - Extension, - TenEnv, - Cmd, - AudioFrameDataFmt, - AudioFrame, - Data, - StatusCode, - CmdResult, -) -from .cartesia_wrapper import CartesiaWrapper, CartesiaConfig, CartesiaError -from .log import logger - -class CartesiaCallback: - # Handles audio processing and interrupt checks - def __init__(self, ten: TenEnv, sample_rate: int, need_interrupt_callback): - self.ten = ten - self.sample_rate = sample_rate - self.need_interrupt_callback = need_interrupt_callback - self.ts = datetime.now() - - def set_input_ts(self, ts: datetime): - # Updates timestamp for the current input - self.ts = ts - - def need_interrupt(self) -> bool: - # Checks if current task should be interrupted - return self.need_interrupt_callback(self.ts) - - def create_audio_frame(self, audio_data): - # Creates an AudioFrame from raw audio data - frame = AudioFrame.create("pcm_frame") - frame.set_sample_rate(self.sample_rate) - frame.set_bytes_per_sample(2) # s16le is 2 bytes per sample - frame.set_number_of_channels(1) - frame.set_data_fmt(AudioFrameDataFmt.INTERLEAVE) - frame.set_samples_per_channel(len(audio_data) // 2) - frame.alloc_buf(len(audio_data)) - buff = frame.lock_buf() - buff[:] = audio_data - frame.unlock_buf(buff) - return frame - - def process_audio(self, audio_data): - # Processes audio data if not interrupted - if self.need_interrupt(): - return - audio_frame = self.create_audio_frame(audio_data) - self.ten.send_audio_frame(audio_frame) - -class CartesiaTTSExtension(Extension): - def __init__(self, name: str): - super().__init__(name) - self.cartesia = None - self.loop = None - self.queue = queue.Queue() - self.outdate_ts = datetime.now() - self.stopped = False - self.thread = None - self.callback = None - self.skip_patterns = [r'\bssml_\w+\b'] # List of patterns to skip - self.ten = None - - def on_start(self, ten: TenEnv) -> None: - self.ten = ten - try: - # Initialize Cartesia config and wrapper - cartesia_config = CartesiaConfig( - api_key=ten.get_property_string("api_key"), - model_id=ten.get_property_string("model_id"), - voice_id=ten.get_property_string("voice_id"), - sample_rate=int(ten.get_property_string("sample_rate")), - cartesia_version=ten.get_property_string("cartesia_version") - ) - self.cartesia = CartesiaWrapper(cartesia_config) - self.callback = CartesiaCallback(ten, cartesia_config.sample_rate, self.need_interrupt) - - # Set up asyncio event loop - self.loop = asyncio.new_event_loop() - asyncio.set_event_loop(self.loop) - - # Connect to Cartesia API - self.loop.run_until_complete(self.cartesia.connect()) - logger.info("Successfully connected to Cartesia API") - - # Start async handling thread - self.thread = threading.Thread(target=self.async_handle, args=[ten]) - self.thread.start() - - ten.on_start_done() - except Exception as e: - logger.error(f"Failed to start CartesiaTTSExtension: {e}") - ten.on_start_done() - - def on_stop(self, ten: TenEnv) -> None: - # Clean up resources and stop thread - self.stopped = True - self.flush() - self.queue.put(None) - if self.thread is not None: - self.thread.join() - self.thread = None - - if self.cartesia: - self.loop.run_until_complete(self.cartesia.close()) - if self.loop: - self.loop.close() - ten.on_stop_done() - - def need_interrupt(self, ts: datetime) -> bool: - # Check if task is outdated - return self.outdate_ts > ts - - def process_input_text(self, input_text: str) -> str: - # Process input text to remove parts that should be skipped - for pattern in self.skip_patterns: - input_text = re.sub(pattern, '', input_text, flags=re.IGNORECASE) - return input_text.strip() - - def create_pause_text(self, duration_ms: int) -> str: - # Create pause text - return f"PAUSE_{duration_ms}_MS" - - def on_data(self, ten: TenEnv, data: Data) -> None: - # Queue incoming text for processing - input_text = data.get_property_string("text") - if not input_text: - return - - # Handle the case of just a period or comma - if input_text.strip() in ['.', ',']: - pause_duration = 150 if input_text.strip() == '.' else 150 - pause_text = self.create_pause_text(pause_duration) - self.queue.put(("PAUSE", pause_text, datetime.now())) - return - - processed_text = self.process_input_text(input_text) - - if processed_text.strip(): - self.queue.put(("TEXT", processed_text, datetime.now())) - else: - logger.info("Processed text is empty. Skipping synthesis.") - - def async_handle(self, ten: TenEnv): - # Process queue items asynchronously - while not self.stopped: - try: - value = self.queue.get() - if value is None: - break - - item_type, content, ts = value - - self.callback.set_input_ts(ts) - - if self.callback.need_interrupt(): - logger.info("Drop outdated input") - continue - - try: - audio_data = self.loop.run_until_complete(self.cartesia.synthesize(content)) - self.callback.process_audio(audio_data) - except CartesiaError as e: - logger.error(f"Failed to synthesize: {str(e)}. Moving to next item.") - # Optionally, you could add some fallback behavior here, like playing an error sound - - except Exception as e: - logger.exception(f"Error in async_handle: {e}") - # Continue processing the next item instead of breaking the loop - - def on_cmd(self, ten: TenEnv, cmd: Cmd) -> None: - # Handle incoming commands - cmd_name = cmd.get_name() - - if cmd_name == "flush": - self.outdate_ts = datetime.now() - self.flush() - cmd_result = CmdResult.create(StatusCode.OK) - cmd_result.set_property_string("detail", "Flush command executed") - else: - logger.warning(f"Unknown command received: {cmd_name}") - cmd_result = CmdResult.create(StatusCode.ERROR) - cmd_result.set_property_string("detail", f"Unknown command: {cmd_name}") - - ten.return_result(cmd_result, cmd) - - def flush(self): - # Clear the queue - while not self.queue.empty(): - self.queue.get() diff --git a/agents/ten_packages/extension/cartesia_tts/cartesia_wrapper.py b/agents/ten_packages/extension/cartesia_tts/cartesia_wrapper.py deleted file mode 100644 index 37a3d7c4..00000000 --- a/agents/ten_packages/extension/cartesia_tts/cartesia_wrapper.py +++ /dev/null @@ -1,112 +0,0 @@ -# cartesia_wrapper.py - -import asyncio -import websockets -import json -import base64 -import logging -from urllib.parse import urlparse - -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - -class CartesiaError(Exception): - """Custom exception class for Cartesia-related errors.""" - pass - -class CartesiaConfig: - # Configuration class for Cartesia API - def __init__(self, api_key, model_id, voice_id, sample_rate, cartesia_version): - self.api_key = api_key - self.model_id = model_id - self.voice_id = voice_id - self.sample_rate = sample_rate - self.cartesia_version = cartesia_version - -class CartesiaWrapper: - # Wrapper class for Cartesia API interactions - def __init__(self, config: CartesiaConfig): - self.config = config - self.websocket = None - self.context_id = 0 - - async def connect(self): - # Establish WebSocket connection to Cartesia API - ws_url = f"wss://api.cartesia.ai/tts/websocket?api_key={self.config.api_key}&cartesia_version={self.config.cartesia_version}" - try: - self.websocket = await websockets.connect(ws_url) - logger.info("Connected to Cartesia WebSocket") - except Exception as e: - logger.error(f"Failed to connect to Cartesia API: {str(e)}") - raise CartesiaError(f"Connection failed: {str(e)}") - - async def synthesize(self, text: str): - # Synthesize speech from text using Cartesia API - if not self.websocket: - await self.connect() - - if text.startswith("PAUSE_"): - # Handle custom pause marker - try: - duration_ms = int(text.split("_")[1]) - return self.generate_silence(duration_ms) - except (IndexError, ValueError): - logger.error(f"Invalid pause format: {text}") - raise CartesiaError(f"Invalid pause format: {text}") - - self.context_id += 1 - request = { - "context_id": f"context_{self.context_id}", - "model_id": self.config.model_id, - "transcript": text, - "voice": {"mode": "id", "id": self.config.voice_id}, - "output_format": { - "container": "raw", - "encoding": "pcm_s16le", - "sample_rate": int(self.config.sample_rate) - }, - "language": "en", - "add_timestamps": False - } - - try: - # Send synthesis request - await self.websocket.send(json.dumps(request)) - - # Receive and process audio chunks - audio_data = bytearray() - while True: - response = await self.websocket.recv() - message = json.loads(response) - - if message['type'] == 'chunk': - chunk_data = base64.b64decode(message['data']) - audio_data.extend(chunk_data) - elif message['type'] == 'done': - break - elif message['type'] == 'error': - raise CartesiaError(f"Synthesis error: {message.get('error', 'Unknown error')}") - else: - logger.warning(f"Unknown message type: {message['type']}") - - return audio_data - except websockets.exceptions.ConnectionClosed: - # Handle connection errors and retry - logger.error("WebSocket connection closed unexpectedly. Attempting to reconnect...") - await self.connect() - return await self.synthesize(text) # Retry the synthesis after reconnecting - except Exception as e: - logger.error(f"Error during synthesis: {str(e)}") - raise CartesiaError(f"Synthesis failed: {str(e)}") - - def generate_silence(self, duration_ms: int) -> bytes: - # Generate silent audio data - sample_rate = self.config.sample_rate - num_samples = int(sample_rate * duration_ms / 1000) - return b"\x00" * (num_samples * 2) # Assuming 16-bit audio - - async def close(self): - # Close WebSocket connection - if self.websocket: - await self.websocket.close() - logger.info("Closed WebSocket connection to Cartesia API") diff --git a/agents/ten_packages/extension/cartesia_tts/extension.py b/agents/ten_packages/extension/cartesia_tts/extension.py index 4883c11c..5729cfe6 100644 --- a/agents/ten_packages/extension/cartesia_tts/extension.py +++ b/agents/ten_packages/extension/cartesia_tts/extension.py @@ -1 +1,52 @@ -EXTENSION_NAME = "cartesia_tts" +# +# This file is part of TEN Framework, an open source project. +# Licensed under the Apache License, Version 2.0. +# See the LICENSE file for more information. +# +import traceback + +from .cartesia_tts import CartesiaTTS, CartesiaTTSConfig +from ten import ( + AsyncTenEnv, +) +from ten_ai_base.tts import AsyncTTSBaseExtension + +class CartesiaTTSExtension(AsyncTTSBaseExtension): + def __init__(self, name: str) -> None: + super().__init__(name) + self.config = None + self.client = None + + async def on_init(self, ten_env: AsyncTenEnv) -> None: + await super().on_init(ten_env) + ten_env.log_debug("on_init") + + async def on_start(self, ten_env: AsyncTenEnv) -> None: + try: + await super().on_start(ten_env) + ten_env.log_debug("on_start") + self.config = CartesiaTTSConfig.create(ten_env=ten_env) + + if not self.config.api_key: + raise ValueError("api_key is required") + + self.client = CartesiaTTS(self.config) + except Exception as err: + ten_env.log_error(f"on_start failed: {traceback.format_exc()}") + + async def on_stop(self, ten_env: AsyncTenEnv) -> None: + await super().on_stop(ten_env) + ten_env.log_debug("on_stop") + + async def on_deinit(self, ten_env: AsyncTenEnv) -> None: + await super().on_deinit(ten_env) + ten_env.log_debug("on_deinit") + + async def on_request_tts(self, ten_env: AsyncTenEnv, input_text: str, end_of_segment: bool) -> None: + audio_stream = await self.client.text_to_speech_stream(input_text) + + async for audio_data in audio_stream: + self.send_audio_out(ten_env, audio_data["audio"]) + + async def on_cancel_tts(self, ten_env: AsyncTenEnv) -> None: + return await super().on_cancel_tts(ten_env) \ No newline at end of file diff --git a/agents/ten_packages/extension/cartesia_tts/log.py b/agents/ten_packages/extension/cartesia_tts/log.py deleted file mode 100644 index fad21710..00000000 --- a/agents/ten_packages/extension/cartesia_tts/log.py +++ /dev/null @@ -1,12 +0,0 @@ -import logging -from .extension import EXTENSION_NAME - -logger = logging.getLogger(EXTENSION_NAME) -logger.setLevel(logging.INFO) - -formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(process)d - [%(filename)s:%(lineno)d] - %(message)s") - -console_handler = logging.StreamHandler() -console_handler.setFormatter(formatter) - -logger.addHandler(console_handler) diff --git a/agents/ten_packages/extension/cartesia_tts/manifest.json b/agents/ten_packages/extension/cartesia_tts/manifest.json index 2a5c3578..85912f14 100644 --- a/agents/ten_packages/extension/cartesia_tts/manifest.json +++ b/agents/ten_packages/extension/cartesia_tts/manifest.json @@ -1,56 +1,67 @@ { - "type": "extension", - "name": "cartesia_tts", - "version": "0.1.0", - "dependencies": [ - { - "type": "system", - "name": "ten_runtime_python", - "version": "0.4" + "type": "extension", + "name": "cartesia_tts", + "version": "0.4.2", + "dependencies": [ + { + "type": "system", + "name": "ten_runtime_python", + "version": "0.4.2" + } + ], + "package": { + "include": [ + "manifest.json", + "property.json", + "BUILD.gn", + "**.tent", + "**.py", + "README.md", + "tests/**" + ] + }, + "api": { + "property": { + "api_key": { + "type": "string" + }, + "language": { + "type": "string" + }, + "model_id": { + "type": "string" + }, + "sample_rate": { + "type": "int64" + }, + "voice_id": { + "type": "string" + } + }, + "data_in": [ + { + "name": "text_data", + "property": { + "text": { + "type": "string" + } } + } ], - "api": { - "property": { - "api_key": { - "type": "string" - }, - "cartesia_version": { - "type": "string" - }, - "model_id": { - "type": "string" - }, - "sample_rate": { - "type": "string" - }, - "voice_id": { - "type": "string" - } - }, - "data_in": [ - { - "name": "text_data", - "property": { - "text": { - "type": "string" - } - } - } - ], - "cmd_in": [ - { - "name": "flush" - } - ], - "cmd_out": [ - { - "name": "flush" - } - ], - "audio_frame_out": [ - { - "name": "pcm_frame" - } - ] - } + "cmd_in": [ + { + "name": "flush" + } + ], + "cmd_out": [ + { + "name": "flush" + } + ], + "audio_frame_out": [ + { + "name": "pcm_frame" + } + ] + } } \ No newline at end of file diff --git a/agents/ten_packages/extension/cartesia_tts/requirements.txt b/agents/ten_packages/extension/cartesia_tts/requirements.txt new file mode 100644 index 00000000..59c3d54e --- /dev/null +++ b/agents/ten_packages/extension/cartesia_tts/requirements.txt @@ -0,0 +1 @@ +cartesia \ No newline at end of file diff --git a/agents/ten_packages/extension/cartesia_tts/tests/test_basic.py b/agents/ten_packages/extension/cartesia_tts/tests/test_basic.py new file mode 100644 index 00000000..c3755f44 --- /dev/null +++ b/agents/ten_packages/extension/cartesia_tts/tests/test_basic.py @@ -0,0 +1,36 @@ +# +# Copyright © 2024 Agora +# This file is part of TEN Framework, an open source project. +# Licensed under the Apache License, Version 2.0, with certain conditions. +# Refer to the "LICENSE" file in the root directory for more information. +# +from pathlib import Path +from ten import ExtensionTester, TenEnvTester, Cmd, CmdResult, StatusCode + + +class ExtensionTesterBasic(ExtensionTester): + def check_hello(self, ten_env: TenEnvTester, result: CmdResult): + statusCode = result.get_status_code() + print("receive hello_world, status:" + str(statusCode)) + + if statusCode == StatusCode.OK: + ten_env.stop_test() + + def on_start(self, ten_env: TenEnvTester) -> None: + new_cmd = Cmd.create("hello_world") + + print("send hello_world") + ten_env.send_cmd( + new_cmd, + lambda ten_env, result: self.check_hello(ten_env, result), + ) + + print("tester on_start_done") + ten_env.on_start_done() + + +def test_basic(): + tester = ExtensionTesterBasic() + tester.add_addon_base_dir(str(Path(__file__).resolve().parent.parent)) + tester.set_test_mode_single("default_async_extension_python") + tester.run() diff --git a/agents/ten_packages/extension/deepgram_asr_python/manifest.json b/agents/ten_packages/extension/deepgram_asr_python/manifest.json index e7914dd6..0ba17d06 100644 --- a/agents/ten_packages/extension/deepgram_asr_python/manifest.json +++ b/agents/ten_packages/extension/deepgram_asr_python/manifest.json @@ -1,88 +1,88 @@ { - "type": "extension", - "name": "deepgram_asr_python", - "version": "0.1.0", - "dependencies": [ - { - "type": "system", - "name": "ten_runtime_python", - "version": "0.4" + "type": "extension", + "name": "deepgram_asr_python", + "version": "0.1.0", + "dependencies": [ + { + "type": "system", + "name": "ten_runtime_python", + "version": "0.4" + } + ], + "api": { + "property": { + "api_key": { + "type": "string" + }, + "model": { + "type": "string" + }, + "language": { + "type": "string" + }, + "sample_rate": { + "type": "int64" + } + }, + "audio_frame_in": [ + { + "name": "pcm_frame", + "property": {} + } + ], + "cmd_in": [ + { + "name": "on_user_joined", + "property": { + "user_id": { + "type": "string" + } + } + }, + { + "name": "on_user_left", + "property": { + "user_id": { + "type": "string" + } } + }, + { + "name": "on_connection_failure", + "property": { + "error": { + "type": "string" + } + } + } ], - "api": { + "data_out": [ + { + "name": "text_data", "property": { - "api_key": { - "type": "string" - }, - "model": { - "type": "string" - }, - "language": { - "type": "string" - }, - "sample_rate": { - "type": "string" - } - }, - "audio_frame_in": [ - { - "name": "pcm_frame", - "property": {} - } - ], - "cmd_in": [ - { - "name": "on_user_joined", - "property": { - "user_id": { - "type": "string" - } - } - }, - { - "name": "on_user_left", - "property": { - "user_id": { - "type": "string" - } - } - }, - { - "name": "on_connection_failure", - "property": { - "error": { - "type": "string" - } - } - } - ], - "data_out": [ - { - "name": "text_data", - "property": { - "time": { - "type": "int64" - }, - "duration_ms": { - "type": "int64" - }, - "language": { - "type": "string" - }, - "text": { - "type": "string" - }, - "is_final": { - "type": "bool" - }, - "stream_id": { - "type": "uint32" - }, - "end_of_segment": { - "type": "bool" - } - } - } - ] - } + "time": { + "type": "int64" + }, + "duration_ms": { + "type": "int64" + }, + "language": { + "type": "string" + }, + "text": { + "type": "string" + }, + "is_final": { + "type": "bool" + }, + "stream_id": { + "type": "uint32" + }, + "end_of_segment": { + "type": "bool" + } + } + } + ] + } } \ No newline at end of file