From 700cbd1b7c59bdaee33f1615c549bdf3e390ae27 Mon Sep 17 00:00:00 2001 From: TomasLiu Date: Wed, 2 Oct 2024 20:27:27 +0800 Subject: [PATCH 1/8] init tool --- .../extension/openai_v2v_python/client.py | 7 +- .../extension/openai_v2v_python/extension.py | 54 ++++++++++- .../extension/openai_v2v_python/tools.py | 91 +++++++++++++++++++ 3 files changed, 149 insertions(+), 3 deletions(-) create mode 100644 agents/ten_packages/extension/openai_v2v_python/tools.py diff --git a/agents/ten_packages/extension/openai_v2v_python/client.py b/agents/ten_packages/extension/openai_v2v_python/client.py index 471ccede..df6ebcd4 100644 --- a/agents/ten_packages/extension/openai_v2v_python/client.py +++ b/agents/ten_packages/extension/openai_v2v_python/client.py @@ -12,6 +12,11 @@ DEFAULT_MODEL = "gpt-4o-realtime-preview" +DEFAULT_INSTRUCTION = ''' +You are a helpful voice assistant, user can see transcription through the chat box. Your name is TEN Agent. You are built by TEN Framework which is a powerful realtime multimodal agent framework. User's input will mainly be {language}, and your response must be {language}. +{tools} +''' + def smart_str(s: str, max_field_len: int = 128) -> str: """parse string as json, truncate data field to 128 characters, reserialize""" try: @@ -42,7 +47,7 @@ def __init__( verbose: bool = False, model: str=DEFAULT_MODEL, language: str = "en-US", - system_message: str="You are a helpful assistant, you are professional but lively and friendly. User's input will mainly be {language}, and your response must be {language}.", + system_message: str=DEFAULT_INSTRUCTION, temperature: float =0.5, max_tokens: int =1024, voice: messages.Voices = messages.Voices.Alloy, diff --git a/agents/ten_packages/extension/openai_v2v_python/extension.py b/agents/ten_packages/extension/openai_v2v_python/extension.py index 3ba1bc0e..b53f73a8 100644 --- a/agents/ten_packages/extension/openai_v2v_python/extension.py +++ b/agents/ten_packages/extension/openai_v2v_python/extension.py @@ -24,6 +24,7 @@ from .log import logger from .client import RealtimeApiClient, RealtimeApiConfig from .messages import * +from .tools import ToolRegistry # properties PROPERTY_API_KEY = "api_key" # Required @@ -60,7 +61,7 @@ def __init__(self, name: str): self.ctx: dict = {} # audo related - self.sample_rate: int = 24000 + self.sample_rate: int = 16000 self.out_audio_buff: bytearray = b'' self.audio_len_threshold: int = 10240 self.transcript: str = '' @@ -69,6 +70,7 @@ def __init__(self, name: str): self.remote_stream_id: int = 0 self.channel_name: str = "" self.dump: bool = False + self.registry = ToolRegistry() def on_start(self, ten_env: TenEnv) -> None: logger.info("OpenAIV2VExtension on_start") @@ -83,6 +85,8 @@ def start_event_loop(loop): self.thread = threading.Thread( target=start_event_loop, args=(self.loop,)) self.thread.start() + + self._register_local_tools() asyncio.run_coroutine_threadsafe(self._init_client(), self.loop) @@ -245,6 +249,26 @@ def get_time_ms() -> int: relative_start_ms = get_time_ms() - message.audio_end_ms logger.info( f"On server stop listening, {message.audio_end_ms}, relative {relative_start_ms}") + case ResponseOutputItemDone(): + match message.item: + case FunctionCallItem(): #TODO + tool_call_id = message.item.call_id + response = {"order_success": True} + response_str = json.dumps(response) + + config_msg = messages.SessionUpdate( + session=messages.SessionUpdateParams(tool_choice="none") + ) + await client.send_message(config_msg) + tool_response = messages.ItemCreate( + item=messages.FunctionCallOutputItemParam( + call_id=tool_call_id, + output=response_str, + ) + ) + + await client.send_message(tool_response) + await client.send_message(messages.ResponseCreate()) case ErrorMessage(): logger.error( f"Error message received: {message.error}") @@ -354,7 +378,9 @@ def _update_session(self) -> SessionUpdate: model=self.config.model, voice=self.config.voice, input_audio_transcription=InputAudioTranscription( - model="whisper-1") + model="whisper-1"), + tool_choice="auto", + tools=self.registry.get_tools() )) def _update_conversation(self) -> UpdateConversationConfig: @@ -420,3 +446,27 @@ def _dump_audio_if_need(self, buf: bytearray, role: Role) -> None: with open("{}_{}.pcm".format(role, self.channel_name), "ab") as dump_file: dump_file.write(buf) + + def _register_local_tools(self) -> None: + self.registry.register( + name="weather", description="This is a weather check func, if the user is asking about the weather. you need to summarize location and time information from the context as parameters. if the information is lack, please ask for more detail before calling.", + callback=self.weather_check, + parameters={ + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The location or region for the weather check.", + }, + "datetime": { + "type": "string", + "description": "The date and time for the weather check. The datetime should use format like 2024-10-01T16:42:00.", + } + }, + "required": ["location"], + }) + self.ctx["tools"] = self.registry.to_prompt() + + # Tools + def weather_check(self, location:str = "", datetime:str = ""): + logger.info(f"on weather check {location}, {datetime}") \ No newline at end of file diff --git a/agents/ten_packages/extension/openai_v2v_python/tools.py b/agents/ten_packages/extension/openai_v2v_python/tools.py new file mode 100644 index 00000000..0da9fe7b --- /dev/null +++ b/agents/ten_packages/extension/openai_v2v_python/tools.py @@ -0,0 +1,91 @@ +import json +import copy +from typing import Dict, Any + +from log import logger + +class ToolRegistry: + tools: Dict[str, dict[str, Any]] = {} + def register(self, name:str, description: str, callback, parameters: Any = None) -> None: + info = { + "type": "function", + "name": name, + "description": description, + "callback": callback + } + if parameters: + info["parameters"] = parameters + self.tools[name] = info + logger.info(f"register tool {name} {description}") + + def to_prompt(self) -> str: + prompt = "" + if self.tools: + prompt = "You have several tools that you can get help from:\n" + for name, t in self.tools.items(): + desc = t["description"] + prompt += f"- ***{name}***: {desc}" + return prompt + + def unregister(self, name:str) -> None: + if name in self.tools: + del self.tools[name] + logger.info(f"unregister tool {name}") + + def get_tools(self) -> list[dict[str, Any]]: + result = [] + for _, t in self.tools.items(): + info = copy.copy(t) + del info["callback"] + result.append(info) + return result + + def on_func_call(self, name, args, callback): + try: + if name in self.tools: + t = self.tools[name] + # FIXME add args check + if t.get("callback"): + result = t["callback"](**args) + callback(result) + else: + logger.warning(f"Failed to find func {name}") + except: + logger.exception(f"Failed to call func {name}") + # TODO What to do if func call is dead + callback(None) + +if __name__ == "__main__": + r = ToolRegistry() + + def weather_check(location:str = "", datetime:str = ""): + logger.info(f"on weather check {location}, {datetime}") + + def on_tool_completion(result: Any): + logger.info(f"on tool completion {result}") + + r.register( + name="weather", description="This is a weather check func, if the user is asking about the weather. you need to summarize location and time information from the context as parameters. if the information is lack, please ask for more detail before calling.", + callback=weather_check, + parameters={ + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The location or region for the weather check.", + }, + "datetime": { + "type": "string", + "description": "The date and time for the weather check. The datetime should use format like 2024-10-01T16:42:00.", + } + }, + "required": ["location"], + }) + print(r.to_prompt()) + print(r.get_tools()) + print(r.on_func_call("weather", {"location":"LA", "datetime":"2024-10-01T16:43:01"}, on_tool_completion)) + r.unregister("weather") + print(r.to_prompt()) + print(r.get_tools()) + print(r.on_func_call("weather", {"location":"LA", "datetime":"2024-10-01T16:43:01"}, on_tool_completion)) + \ No newline at end of file From a729e98e82c2c7d82268793822eb0f09b46a44fd Mon Sep 17 00:00:00 2001 From: TomasLiu Date: Sun, 6 Oct 2024 12:21:08 +0800 Subject: [PATCH 2/8] fix python exception --- .../extension/openai_v2v_python/extension.py | 51 ++++++++++--------- .../weatherapi_tool_python/extension.py | 26 ++++++---- 2 files changed, 45 insertions(+), 32 deletions(-) diff --git a/agents/ten_packages/extension/openai_v2v_python/extension.py b/agents/ten_packages/extension/openai_v2v_python/extension.py index e4c3dc38..10bb46bc 100644 --- a/agents/ten_packages/extension/openai_v2v_python/extension.py +++ b/agents/ten_packages/extension/openai_v2v_python/extension.py @@ -10,6 +10,7 @@ import base64 from datetime import datetime from typing import Awaitable +from functools import partial from ten import ( AudioFrame, @@ -75,7 +76,7 @@ def __init__(self, name: str): self.ctx: dict = {} # audo related - self.sample_rate: int = 16000 + self.sample_rate: int = 24000 self.out_audio_buff: bytearray = b'' self.audio_len_threshold: int = 10240 self.transcript: str = '' @@ -121,7 +122,7 @@ def on_stop(self, ten_env: TenEnv) -> None: def on_audio_frame(self, ten_env: TenEnv, audio_frame: AudioFrame) -> None: try: stream_id = audio_frame.get_property_int("stream_id") - logger.debug(f"on_audio_frame {stream_id}") + #logger.debug(f"on_audio_frame {stream_id}") if self.channel_name == "": self.channel_name = audio_frame.get_property_string("channel") @@ -148,7 +149,7 @@ def on_cmd(self, ten_env: TenEnv, cmd: Cmd) -> None: cmd_name = cmd.get_name() if cmd_name == CMD_TOOL_REGISTER: - self._on_tool_register(cmd) + self._on_tool_register(ten_env, cmd) cmd_result = CmdResult.create(StatusCode.OK) ten_env.return_result(cmd_result, cmd) @@ -247,7 +248,7 @@ def get_time_ms() -> int: logger.info(f"Output item done {message.item}") case ResponseOutputItemAdded(): logger.info( - f"Output item added {message.output_index} {message.item.id}") + f"Output item added {message.output_index} {message.item}") case ResponseAudioDelta(): if message.response_id in flushed: logger.warning( @@ -278,15 +279,12 @@ def get_time_ms() -> int: relative_start_ms = get_time_ms() - message.audio_end_ms logger.info( f"On server stop listening, {message.audio_end_ms}, relative {relative_start_ms}") - case ResponseOutputItemDone(): - item = message.item - match item: - case FunctionCallItemParam(): - tool_call_id = item.call_id - name = item.name - arguments = item.arguments - logger.info(f"need to call func {name}") - await self.registry.on_func_call(tool_call_id, name, arguments, self._on_tool_output) # TODO rebuild this into async, or it will block the thread + case ResponseFunctionCallArgumentsDone(): + tool_call_id = message.call_id + name = message.name + arguments = message.arguments + logger.info(f"need to call func {name}") + await self.registry.on_func_call(tool_call_id, name, arguments, self._on_tool_output) # TODO rebuild this into async, or it will block the thread case ErrorMessage(): logger.error( f"Error message received: {message.error}") @@ -479,15 +477,16 @@ def _dump_audio_if_need(self, buf: bytearray, role: Role) -> None: def _register_local_tools(self) -> None: self.ctx["tools"] = self.registry.to_prompt() - def _on_tool_register(self, cmd: Cmd): + def _on_tool_register(self, ten_env: TenEnv, cmd: Cmd): try: name = cmd.get_property_string(TOOL_REGISTER_PROPERTY_NAME) description = cmd.get_property_string(TOOL_REGISTER_PROPERTY_DESCRIPTON) pstr = cmd.get_property_string(TOOL_REGISTER_PROPERTY_PARAMETERS) parameters = json.loads(pstr) + p = partial(self._remote_tool_call, ten_env) self.registry.register( name=name, description=description, - callback=self._remote_tool_call, + callback=p, parameters=parameters) logger.info(f"on tool register {name} {description}") self.on_config_changed() @@ -495,22 +494,28 @@ def _on_tool_register(self, cmd: Cmd): logger.exception(f"Failed to register") async def _remote_tool_call(self, ten_env: TenEnv, name:str, args: str, callback: Awaitable): + logger.info(f"_remote_tool_call {name} {args}") c = Cmd.create(CMD_TOOL_CALL) c.set_property_string(CMD_PROPERTY_NAME, name) c.set_property_string(CMD_PROPERTY_ARGS, args) ten_env.send_cmd(c, lambda ten, result: asyncio.run_coroutine_threadsafe( - callback(result.get_property_string("response"), self.loop))) + callback(result.get_property_string("response")), self.loop)) + logger.info(f"_remote_tool_call finish {name} {args}") async def _on_tool_output(self, tool_call_id:str, result: str): - tool_response = ItemCreate( - item=FunctionCallOutputItemParam( - call_id=tool_call_id, - output=result, + logger.info(f"_on_tool_output {tool_call_id} {result}") + try: + tool_response = ItemCreate( + item=FunctionCallOutputItemParam( + call_id=tool_call_id, + output=result, + ) ) - ) - await self.conn.send_message(tool_response) - await self.conn.send_request(ResponseCreate()) + await self.conn.send_request(tool_response) + await self.conn.send_request(ResponseCreate()) + except: + logger.exception("Failed to handle tool output") def _greeting_text(self) -> str: text = "Hi, there." diff --git a/agents/ten_packages/extension/weatherapi_tool_python/extension.py b/agents/ten_packages/extension/weatherapi_tool_python/extension.py index 69fc2c19..d9485062 100644 --- a/agents/ten_packages/extension/weatherapi_tool_python/extension.py +++ b/agents/ten_packages/extension/weatherapi_tool_python/extension.py @@ -67,11 +67,11 @@ def on_start(self, ten_env: TenEnv) -> None: return # Register func - #c = Cmd.create(CMD_TOOL_REGISTER) - #c.set_property_string(TOOL_REGISTER_PROPERTY_NAME, TOOL_NAME) - #c.set_property_string(TOOL_REGISTER_PROPERTY_DESCRIPTON, TOOL_DESCRIPTION) - #c.set_property_string(TOOL_REGISTER_PROPERTY_PARAMETERS, json.dumps(TOOL_PARAMETERS)) - #ten_env.send_cmd(c, lambda ten, result: logger.info(f"register done, {result}")) + c = Cmd.create(CMD_TOOL_REGISTER) + c.set_property_string(TOOL_REGISTER_PROPERTY_NAME, TOOL_NAME) + c.set_property_string(TOOL_REGISTER_PROPERTY_DESCRIPTON, TOOL_DESCRIPTION) + c.set_property_string(TOOL_REGISTER_PROPERTY_PARAMETERS, json.dumps(TOOL_PARAMETERS)) + ten_env.send_cmd(c, lambda ten, result: logger.info(f"register done, {result}")) ten_env.on_start_done() @@ -86,7 +86,7 @@ def on_deinit(self, ten_env: TenEnv) -> None: def on_cmd(self, ten_env: TenEnv, cmd: Cmd) -> None: cmd_name = cmd.get_name() - logger.info("on_cmd name {}".format(cmd_name)) + logger.info(f"on_cmd name {cmd_name} {cmd.to_json()}") try: name = cmd.get_property_string(CMD_PROPERTY_NAME) @@ -95,23 +95,31 @@ def on_cmd(self, ten_env: TenEnv, cmd: Cmd) -> None: args = cmd.get_property_string(CMD_PROPERTY_ARGS) arg_dict = json.loads(args) if "location" in arg_dict: + logger.info(f"before get current weather {name}") resp = self._get_current_weather(arg_dict["location"]) + logger.info(f"after get current weather {resp}") cmd_result = CmdResult.create(StatusCode.OK) cmd_result.set_property_string("response", json.dumps(resp)) ten_env.return_result(cmd_result, cmd) + return else: + logger.error(f"no location in args {args}") cmd_result = CmdResult.create(StatusCode.ERROR) ten_env.return_result(cmd_result, cmd) + return except: - logger.exception(f"Failed to get weather") + logger.exception("Failed to get weather") cmd_result = CmdResult.create(StatusCode.ERROR) ten_env.return_result(cmd_result, cmd) + return + else: + logger.error(f"unknown tool name {name}") except: - logger.exception(f"Failed to get tool name") + logger.exception("Failed to get tool name") cmd_result = CmdResult.create(StatusCode.ERROR) ten_env.return_result(cmd_result, cmd) + return - cmd_result = CmdResult.create(StatusCode.OK) ten_env.return_result(cmd_result, cmd) From 23dc06d114bf5a1c7dbb8fb5caaaebf2c7dcfc02 Mon Sep 17 00:00:00 2001 From: zhangjie02 Date: Sun, 6 Oct 2024 04:35:44 +0000 Subject: [PATCH 3/8] feat: add tool in v2v --- agents/property.json | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/agents/property.json b/agents/property.json index b26a50f8..993b717a 100644 --- a/agents/property.json +++ b/agents/property.json @@ -2201,6 +2201,15 @@ "extension_group": "transcriber", "addon": "message_collector", "name": "message_collector" + }, + { + "type": "extension", + "extension_group": "tools", + "addon": "weatherapi_tool_python", + "name": "weatherapi_tool_python", + "property": { + "api_key": "${env:WEATHERAPI_API_KEY}" + } } ], "connections": [ @@ -2219,6 +2228,21 @@ } ] }, + { + "extension_group": "tools", + "extension": "weatherapi_tool_python", + "cmd": [ + { + "name": "tool_register", + "dest": [ + { + "extension_group": "llm", + "extension": "openai_v2v_python" + } + ] + } + ] + }, { "extension_group": "llm", "extension": "openai_v2v_python", @@ -2253,6 +2277,15 @@ "extension": "agora_rtc" } ] + }, + { + "name": "tool_call", + "dest": [ + { + "extension_group": "tools", + "extension": "weatherapi_tool_python" + } + ] } ] }, From e8d6240d90fbf7d6c1a2391785f983df51c3d8e2 Mon Sep 17 00:00:00 2001 From: TomasLiu Date: Wed, 2 Oct 2024 20:27:27 +0800 Subject: [PATCH 4/8] init tool --- .../extension/openai_v2v_python/client.py | 175 ++++++++++++++++++ .../extension/openai_v2v_python/extension.py | 27 ++- 2 files changed, 201 insertions(+), 1 deletion(-) create mode 100644 agents/ten_packages/extension/openai_v2v_python/client.py diff --git a/agents/ten_packages/extension/openai_v2v_python/client.py b/agents/ten_packages/extension/openai_v2v_python/client.py new file mode 100644 index 00000000..df6ebcd4 --- /dev/null +++ b/agents/ten_packages/extension/openai_v2v_python/client.py @@ -0,0 +1,175 @@ +import asyncio +import base64 +import json +import os +from typing import Any, AsyncGenerator + +import uuid +import aiohttp +from . import messages + +from .log import logger + +DEFAULT_MODEL = "gpt-4o-realtime-preview" + +DEFAULT_INSTRUCTION = ''' +You are a helpful voice assistant, user can see transcription through the chat box. Your name is TEN Agent. You are built by TEN Framework which is a powerful realtime multimodal agent framework. User's input will mainly be {language}, and your response must be {language}. +{tools} +''' + +def smart_str(s: str, max_field_len: int = 128) -> str: + """parse string as json, truncate data field to 128 characters, reserialize""" + try: + data = json.loads(s) + if "delta" in data: + key = "delta" + elif "audio" in data: + key = "audio" + else: + return s + + if len(data[key]) > max_field_len: + data[key] = data[key][:max_field_len] + "..." + return json.dumps(data) + except json.JSONDecodeError: + return s + + +def generate_client_event_id() -> str: + return str(uuid.uuid4()) + +class RealtimeApiConfig: + def __init__( + self, + base_uri: str = "wss://api.openai.com", + api_key: str | None = None, + path: str = "/v1/realtime", + verbose: bool = False, + model: str=DEFAULT_MODEL, + language: str = "en-US", + system_message: str=DEFAULT_INSTRUCTION, + temperature: float =0.5, + max_tokens: int =1024, + voice: messages.Voices = messages.Voices.Alloy, + server_vad:bool=True, + ): + self.base_uri = base_uri + self.api_key = api_key + self.path = path + self.verbose = verbose + self.model = model + self.language = language + self.system_message = system_message + self.temperature = temperature + self.max_tokens = max_tokens + self.voice = voice + self.server_vad = server_vad + + def build_ctx(self) -> dict: + return { + "language": self.language + } + +class RealtimeApiClient: + def __init__( + self, + base_uri: str, + api_key: str | None = None, + path: str = "/v1/realtime", + model: str = DEFAULT_MODEL, + verbose: bool = False, + session: aiohttp.ClientSession | None = None, + ): + is_local = ( + base_uri.startswith("localhost") + or base_uri.startswith("127.0.0.1") + or base_uri.startswith("0.0.0.0") + ) + has_scheme = base_uri.startswith("ws://") or base_uri.startswith("wss://") + self.url = f"{base_uri}{path}" + if model: + self.url += f"?model={model}" + if verbose: + logger.info(f"URL: {self.url} {is_local=} {has_scheme=}") + + if not has_scheme: + if is_local: + self.url = f"ws://{self.url}" + else: + self.url = f"wss://{self.url}" + + self.api_key = api_key or os.environ.get("OPENAI_API_KEY") + self.websocket: aiohttp.ClientWebSocketResponse | None = None + self.verbose = verbose + self.session = session or aiohttp.ClientSession() + + async def __aenter__(self) -> "RealtimeApiClient": + await self.connect() + return self + + async def __aexit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> bool: + await self.shutdown() + return False + + async def connect(self): + auth = aiohttp.BasicAuth("", self.api_key) if self.api_key else None + + headers = {"OpenAI-Beta": "realtime=v1"} + if "PROD_COMPLETIONS_API_KEY" in os.environ: + headers["X-Prod-Completions-Api-Key"] = os.environ["PROD_COMPLETIONS_API_KEY"] + elif "OPENAI_API_KEY" in os.environ: + headers["X-Prod-Completions-Api-Key"] = os.environ["OPENAI_API_KEY"] + if "PROD_COMPLETIONS_ORG_ID" in os.environ: + headers["X-Prod-Completions-Org-Id"] = os.environ["PROD_COMPLETIONS_ORG_ID"] + if headers: + logger.debug("Using X-Prod-Completions-* headers for api credentials") + + self.websocket = await self.session.ws_connect( + url=self.url, + auth=auth, + headers=headers, + ) + + async def send_audio_data(self, audio_data: bytes): + """audio_data is assumed to be pcm16 24kHz mono little-endian""" + base64_audio_data = base64.b64encode(audio_data).decode("utf-8") + message = messages.InputAudioBufferAppend(audio=base64_audio_data) + await self.send_message(message) + + async def send_message(self, message: messages.ClientToServerMessage): + assert self.websocket is not None + if message.event_id is None: + message.event_id = generate_client_event_id() + message_str = message.model_dump_json() + if self.verbose: + logger.info(f"-> {smart_str(message_str)}") + await self.websocket.send_str(message_str) + + async def listen(self) -> AsyncGenerator[messages.RealtimeMessage, None]: + assert self.websocket is not None + if self.verbose: + logger.info("Listening for realtimeapi messages") + try: + async for msg in self.websocket: + if msg.type == aiohttp.WSMsgType.TEXT: + if self.verbose: + logger.info(f"<- {smart_str(msg.data)}") + yield self.handle_server_message(msg.data) + elif msg.type == aiohttp.WSMsgType.ERROR: + logger.error("Error during receive: %s", self.websocket.exception()) + break + except asyncio.CancelledError: + logger.info("Receive messages task cancelled") + + def handle_server_message(self, message: str) -> messages.ServerToClientMessage: + try: + return messages.parse_server_message(message) + except Exception as e: + logger.error("Error handling message: " + str(e)) + #raise e + + async def shutdown(self): + # Close the websocket connection if it exists + if self.websocket: + await self.websocket.close() + self.websocket = None diff --git a/agents/ten_packages/extension/openai_v2v_python/extension.py b/agents/ten_packages/extension/openai_v2v_python/extension.py index 10bb46bc..0be2d57d 100644 --- a/agents/ten_packages/extension/openai_v2v_python/extension.py +++ b/agents/ten_packages/extension/openai_v2v_python/extension.py @@ -29,6 +29,7 @@ from .conf import RealtimeApiConfig, BASIC_PROMPT from .realtime.connection import RealtimeApiConnection from .realtime.struct import * +from .tools import ToolRegistry # properties PROPERTY_API_KEY = "api_key" # Required @@ -76,7 +77,7 @@ def __init__(self, name: str): self.ctx: dict = {} # audo related - self.sample_rate: int = 24000 + self.sample_rate: int = 16000 self.out_audio_buff: bytearray = b'' self.audio_len_threshold: int = 10240 self.transcript: str = '' @@ -527,3 +528,27 @@ def _greeting_text(self) -> str: text = "안녕하세요" return text + def _register_local_tools(self) -> None: + self.registry.register( + name="weather", description="This is a weather check func, if the user is asking about the weather. you need to summarize location and time information from the context as parameters. if the information is lack, please ask for more detail before calling.", + callback=self.weather_check, + parameters={ + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The location or region for the weather check.", + }, + "datetime": { + "type": "string", + "description": "The date and time for the weather check. The datetime should use format like 2024-10-01T16:42:00.", + } + }, + "required": ["location"], + }) + self.ctx["tools"] = self.registry.to_prompt() + + # Tools + def weather_check(self, location:str = "", datetime:str = ""): + logger.info(f"on weather check {location}, {datetime}") + From 1266b381df65c6554fc3b23b7a22f553a5c6d3d1 Mon Sep 17 00:00:00 2001 From: TomasLiu Date: Sun, 6 Oct 2024 12:21:08 +0800 Subject: [PATCH 5/8] fix python exception --- agents/ten_packages/extension/openai_v2v_python/extension.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agents/ten_packages/extension/openai_v2v_python/extension.py b/agents/ten_packages/extension/openai_v2v_python/extension.py index 0be2d57d..bb1c7a43 100644 --- a/agents/ten_packages/extension/openai_v2v_python/extension.py +++ b/agents/ten_packages/extension/openai_v2v_python/extension.py @@ -77,7 +77,7 @@ def __init__(self, name: str): self.ctx: dict = {} # audo related - self.sample_rate: int = 16000 + self.sample_rate: int = 24000 self.out_audio_buff: bytearray = b'' self.audio_len_threshold: int = 10240 self.transcript: str = '' From f4860370ee5b95bc965ad9e6de56e85d704992e4 Mon Sep 17 00:00:00 2001 From: TomasLiu Date: Sun, 6 Oct 2024 13:46:42 +0800 Subject: [PATCH 6/8] fix --- .../extension/openai_v2v_python/extension.py | 27 +------------------ 1 file changed, 1 insertion(+), 26 deletions(-) diff --git a/agents/ten_packages/extension/openai_v2v_python/extension.py b/agents/ten_packages/extension/openai_v2v_python/extension.py index bb1c7a43..1457498a 100644 --- a/agents/ten_packages/extension/openai_v2v_python/extension.py +++ b/agents/ten_packages/extension/openai_v2v_python/extension.py @@ -526,29 +526,4 @@ def _greeting_text(self) -> str: text = "こんにちは" elif self.config.language == "ko-KR": text = "안녕하세요" - return text - - def _register_local_tools(self) -> None: - self.registry.register( - name="weather", description="This is a weather check func, if the user is asking about the weather. you need to summarize location and time information from the context as parameters. if the information is lack, please ask for more detail before calling.", - callback=self.weather_check, - parameters={ - "type": "object", - "properties": { - "location": { - "type": "string", - "description": "The location or region for the weather check.", - }, - "datetime": { - "type": "string", - "description": "The date and time for the weather check. The datetime should use format like 2024-10-01T16:42:00.", - } - }, - "required": ["location"], - }) - self.ctx["tools"] = self.registry.to_prompt() - - # Tools - def weather_check(self, location:str = "", datetime:str = ""): - logger.info(f"on weather check {location}, {datetime}") - + return text \ No newline at end of file From 1c20910e39bfdf42ff929f10a10e5c41540382df Mon Sep 17 00:00:00 2001 From: TomasLiu Date: Sun, 6 Oct 2024 15:44:27 +0800 Subject: [PATCH 7/8] remove out of date file --- .../extension/openai_v2v_python/client.py | 175 ------------------ .../extension/openai_v2v_python/id.py | 20 -- 2 files changed, 195 deletions(-) delete mode 100644 agents/ten_packages/extension/openai_v2v_python/client.py delete mode 100644 agents/ten_packages/extension/openai_v2v_python/id.py diff --git a/agents/ten_packages/extension/openai_v2v_python/client.py b/agents/ten_packages/extension/openai_v2v_python/client.py deleted file mode 100644 index df6ebcd4..00000000 --- a/agents/ten_packages/extension/openai_v2v_python/client.py +++ /dev/null @@ -1,175 +0,0 @@ -import asyncio -import base64 -import json -import os -from typing import Any, AsyncGenerator - -import uuid -import aiohttp -from . import messages - -from .log import logger - -DEFAULT_MODEL = "gpt-4o-realtime-preview" - -DEFAULT_INSTRUCTION = ''' -You are a helpful voice assistant, user can see transcription through the chat box. Your name is TEN Agent. You are built by TEN Framework which is a powerful realtime multimodal agent framework. User's input will mainly be {language}, and your response must be {language}. -{tools} -''' - -def smart_str(s: str, max_field_len: int = 128) -> str: - """parse string as json, truncate data field to 128 characters, reserialize""" - try: - data = json.loads(s) - if "delta" in data: - key = "delta" - elif "audio" in data: - key = "audio" - else: - return s - - if len(data[key]) > max_field_len: - data[key] = data[key][:max_field_len] + "..." - return json.dumps(data) - except json.JSONDecodeError: - return s - - -def generate_client_event_id() -> str: - return str(uuid.uuid4()) - -class RealtimeApiConfig: - def __init__( - self, - base_uri: str = "wss://api.openai.com", - api_key: str | None = None, - path: str = "/v1/realtime", - verbose: bool = False, - model: str=DEFAULT_MODEL, - language: str = "en-US", - system_message: str=DEFAULT_INSTRUCTION, - temperature: float =0.5, - max_tokens: int =1024, - voice: messages.Voices = messages.Voices.Alloy, - server_vad:bool=True, - ): - self.base_uri = base_uri - self.api_key = api_key - self.path = path - self.verbose = verbose - self.model = model - self.language = language - self.system_message = system_message - self.temperature = temperature - self.max_tokens = max_tokens - self.voice = voice - self.server_vad = server_vad - - def build_ctx(self) -> dict: - return { - "language": self.language - } - -class RealtimeApiClient: - def __init__( - self, - base_uri: str, - api_key: str | None = None, - path: str = "/v1/realtime", - model: str = DEFAULT_MODEL, - verbose: bool = False, - session: aiohttp.ClientSession | None = None, - ): - is_local = ( - base_uri.startswith("localhost") - or base_uri.startswith("127.0.0.1") - or base_uri.startswith("0.0.0.0") - ) - has_scheme = base_uri.startswith("ws://") or base_uri.startswith("wss://") - self.url = f"{base_uri}{path}" - if model: - self.url += f"?model={model}" - if verbose: - logger.info(f"URL: {self.url} {is_local=} {has_scheme=}") - - if not has_scheme: - if is_local: - self.url = f"ws://{self.url}" - else: - self.url = f"wss://{self.url}" - - self.api_key = api_key or os.environ.get("OPENAI_API_KEY") - self.websocket: aiohttp.ClientWebSocketResponse | None = None - self.verbose = verbose - self.session = session or aiohttp.ClientSession() - - async def __aenter__(self) -> "RealtimeApiClient": - await self.connect() - return self - - async def __aexit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> bool: - await self.shutdown() - return False - - async def connect(self): - auth = aiohttp.BasicAuth("", self.api_key) if self.api_key else None - - headers = {"OpenAI-Beta": "realtime=v1"} - if "PROD_COMPLETIONS_API_KEY" in os.environ: - headers["X-Prod-Completions-Api-Key"] = os.environ["PROD_COMPLETIONS_API_KEY"] - elif "OPENAI_API_KEY" in os.environ: - headers["X-Prod-Completions-Api-Key"] = os.environ["OPENAI_API_KEY"] - if "PROD_COMPLETIONS_ORG_ID" in os.environ: - headers["X-Prod-Completions-Org-Id"] = os.environ["PROD_COMPLETIONS_ORG_ID"] - if headers: - logger.debug("Using X-Prod-Completions-* headers for api credentials") - - self.websocket = await self.session.ws_connect( - url=self.url, - auth=auth, - headers=headers, - ) - - async def send_audio_data(self, audio_data: bytes): - """audio_data is assumed to be pcm16 24kHz mono little-endian""" - base64_audio_data = base64.b64encode(audio_data).decode("utf-8") - message = messages.InputAudioBufferAppend(audio=base64_audio_data) - await self.send_message(message) - - async def send_message(self, message: messages.ClientToServerMessage): - assert self.websocket is not None - if message.event_id is None: - message.event_id = generate_client_event_id() - message_str = message.model_dump_json() - if self.verbose: - logger.info(f"-> {smart_str(message_str)}") - await self.websocket.send_str(message_str) - - async def listen(self) -> AsyncGenerator[messages.RealtimeMessage, None]: - assert self.websocket is not None - if self.verbose: - logger.info("Listening for realtimeapi messages") - try: - async for msg in self.websocket: - if msg.type == aiohttp.WSMsgType.TEXT: - if self.verbose: - logger.info(f"<- {smart_str(msg.data)}") - yield self.handle_server_message(msg.data) - elif msg.type == aiohttp.WSMsgType.ERROR: - logger.error("Error during receive: %s", self.websocket.exception()) - break - except asyncio.CancelledError: - logger.info("Receive messages task cancelled") - - def handle_server_message(self, message: str) -> messages.ServerToClientMessage: - try: - return messages.parse_server_message(message) - except Exception as e: - logger.error("Error handling message: " + str(e)) - #raise e - - async def shutdown(self): - # Close the websocket connection if it exists - if self.websocket: - await self.websocket.close() - self.websocket = None diff --git a/agents/ten_packages/extension/openai_v2v_python/id.py b/agents/ten_packages/extension/openai_v2v_python/id.py deleted file mode 100644 index e8fe56c8..00000000 --- a/agents/ten_packages/extension/openai_v2v_python/id.py +++ /dev/null @@ -1,20 +0,0 @@ -import random -import string - - -def generate_rand_str(prefix: str, len: int = 16) -> str: - # Generate a random string of specified length with the given prefix - random_str = "".join(random.choices(string.ascii_letters + string.digits, k=len)) - return f"{prefix}_{random_str}" - - -def generate_client_event_id() -> str: - return generate_rand_str("cevt") - - -def generate_event_id() -> str: - return generate_rand_str("event") - - -def generate_response_id() -> str: - return generate_rand_str("resp") From 59ee5056bb923ff952d3d409292504856e93c3cf Mon Sep 17 00:00:00 2001 From: TomasLiu Date: Sun, 6 Oct 2024 16:59:29 +0800 Subject: [PATCH 8/8] remove standalone graph --- agents/property.json | 147 ------------------------------------------- 1 file changed, 147 deletions(-) diff --git a/agents/property.json b/agents/property.json index 993b717a..68888fb1 100644 --- a/agents/property.json +++ b/agents/property.json @@ -2305,153 +2305,6 @@ ] } ] - }, - { - "name": "va.openai.v2v.tools", - "auto_start": false, - "nodes": [ - { - "type": "extension", - "extension_group": "rtc", - "addon": "agora_rtc", - "name": "agora_rtc", - "property": { - "app_id": "${env:AGORA_APP_ID}", - "token": "", - "channel": "astra_agents_test", - "stream_id": 1234, - "remote_stream_id": 123, - "subscribe_audio": true, - "publish_audio": true, - "publish_data": true, - "subscribe_audio_sample_rate": 24000 - } - }, - { - "type": "extension", - "extension_group": "llm", - "addon": "openai_v2v_python", - "name": "openai_v2v_python", - "property": { - "api_key": "${env:OPENAI_REALTIME_API_KEY}", - "temperature": 0.9, - "model": "gpt-4o-realtime-preview", - "max_tokens": 2048, - "voice": "alloy", - "language": "en-US", - "server_vad": true, - "dump": true - } - }, - { - "type": "extension", - "extension_group": "transcriber", - "addon": "message_collector", - "name": "message_collector" - }, - { - "type": "extension", - "extension_group": "tools", - "addon": "weatherapi_tool_python", - "name": "weatherapi_tool_python", - "property": { - "api_key": "${env:WEATHERAPI_API_KEY}" - } - } - ], - "connections": [ - { - "extension_group": "rtc", - "extension": "agora_rtc", - "audio_frame": [ - { - "name": "pcm_frame", - "dest": [ - { - "extension_group": "llm", - "extension": "openai_v2v_python" - } - ] - } - ] - }, - { - "extension_group": "tools", - "extension": "weatherapi_tool_python", - "cmd": [ - { - "name": "tool_register", - "dest": [ - { - "extension_group": "llm", - "extension": "openai_v2v_python" - } - ] - } - ] - }, - { - "extension_group": "llm", - "extension": "openai_v2v_python", - "audio_frame": [ - { - "name": "pcm_frame", - "dest": [ - { - "extension_group": "rtc", - "extension": "agora_rtc" - } - ] - } - ], - "data": [ - { - "name": "text_data", - "dest": [ - { - "extension_group": "transcriber", - "extension": "message_collector" - } - ] - } - ], - "cmd": [ - { - "name": "flush", - "dest": [ - { - "extension_group": "rtc", - "extension": "agora_rtc" - } - ] - }, - { - "name": "tool_call", - "dest": [ - { - "extension_group": "tools", - "extension": "weatherapi_tool_python" - } - ] - } - ] - }, - { - "extension_group": "transcriber", - "extension": "message_collector", - "data": [ - { - "name": "data", - "dest": [ - { - "extension_group": "rtc", - "extension": "agora_rtc" - } - ] - } - ] - } - ] } ] }