From cd661c7d0bf254470f2197f91c1430a42f92ca69 Mon Sep 17 00:00:00 2001 From: qinhui <> Date: Fri, 13 Dec 2024 20:42:21 +0800 Subject: [PATCH] Add computer tool extension --- agents/examples/experimental/property.json | 353 ++++++++++++++++ .../extension/computer_tool_python/BUILD.gn | 19 + .../extension/computer_tool_python/README.md | 22 + .../computer_tool_python/__init__.py | 6 + .../extension/computer_tool_python/addon.py | 19 + .../computer_tool_python/extension.py | 388 ++++++++++++++++++ .../computer_tool_python/manifest.json | 122 ++++++ .../extension/computer_tool_python/openai.py | 137 +++++++ .../computer_tool_python/property.json | 1 + .../computer_tool_python/tests/test_basic.py | 36 ++ 10 files changed, 1103 insertions(+) create mode 100644 agents/ten_packages/extension/computer_tool_python/BUILD.gn create mode 100644 agents/ten_packages/extension/computer_tool_python/README.md create mode 100644 agents/ten_packages/extension/computer_tool_python/__init__.py create mode 100644 agents/ten_packages/extension/computer_tool_python/addon.py create mode 100644 agents/ten_packages/extension/computer_tool_python/extension.py create mode 100644 agents/ten_packages/extension/computer_tool_python/manifest.json create mode 100644 agents/ten_packages/extension/computer_tool_python/openai.py create mode 100644 agents/ten_packages/extension/computer_tool_python/property.json create mode 100644 agents/ten_packages/extension/computer_tool_python/tests/test_basic.py diff --git a/agents/examples/experimental/property.json b/agents/examples/experimental/property.json index eca28972..0d46e3e4 100644 --- a/agents/examples/experimental/property.json +++ b/agents/examples/experimental/property.json @@ -2,6 +2,359 @@ "_ten": { "log_level": 3, "predefined_graphs": [ + { + "name": "computer_tool_openai_azure", + "auto_start": false, + "nodes": [ + { + "type": "extension", + "extension_group": "default", + "addon": "agora_rtc", + "name": "agora_rtc", + "property": { + "app_id": "${env:AGORA_APP_ID}", + "token": "", + "channel": "ten_agent_test", + "stream_id": 1234, + "remote_stream_id": 123, + "subscribe_audio": true, + "subscribe_video": true, + "publish_audio": true, + "publish_data": true, + "enable_agora_asr": true, + "agora_asr_vendor_name": "microsoft", + "agora_asr_language": "en-US", + "agora_asr_vendor_key": "${env:AZURE_STT_KEY}", + "agora_asr_vendor_region": "${env:AZURE_STT_REGION}", + "agora_asr_session_control_file_path": "session_control.conf", + "subscribe_video_pix_fmt": 4 + } + }, + { + "type": "extension", + "extension_group": "default", + "addon": "interrupt_detector", + "name": "interrupt_detector" + }, + { + "type": "extension", + "extension_group": "chatgpt", + "addon": "openai_chatgpt_python", + "name": "openai_chatgpt", + "property": { + "base_url": "${env:OPENAI_API_BASE}", + "api_key": "${env:OPENAI_API_KEY}", + "frequency_penalty": 0.9, + "model": "gpt-4o", + "max_tokens": 512, + "prompt": "", + "proxy_url": "${env:OPENAI_PROXY_URL}", + "greeting": "TEN Agent connected. How can I help you today?", + "checking_vision_text_items": "[\"Let me take a look...\",\"Let me check your camera...\",\"Please wait for a second...\"]", + "max_memory_length": 10, + "enable_tools": true + } + }, + { + "type": "extension", + "extension_group": "chatgpt", + "addon": "vision_tool_python", + "name": "vision_tool" + }, + { + "type": "extension", + "extension_group": "chatgpt", + "addon": "computer_tool_python", + "name": "computer_tool_python", + "property": { + "base_url": "${env:OPENAI_API_BASE}", + "api_key": "${env:OPENAI_API_KEY}", + "frequency_penalty": 0.9, + "model": "gpt-4o", + "max_tokens": 512, + "prompt": "", + "proxy_url": "${env:OPENAI_PROXY_URL}", + "greeting": "TEN Agent connected. How can I help you today?", + "checking_vision_text_items": "[\"Let me take a look...\",\"Let me check your camera...\",\"Please wait for a second...\"]", + "max_memory_length": 10, + "enable_tools": true + } + }, + { + "type": "extension", + "extension_group": "chatgpt", + "addon": "weatherapi_tool_python", + "name": "weatherapi_tool_python", + "property": { + "api_key": "${env:WEATHERAPI_API_KEY}" + } + }, + { + "type": "extension", + "extension_group": "chatgpt", + "addon": "bingsearch_tool_python", + "name": "bingsearch_tool_python", + "property": { + "api_key": "${env:BING_API_KEY}" + } + }, + { + "type": "extension", + "extension_group": "tts", + "addon": "azure_tts", + "name": "azure_tts", + "property": { + "azure_subscription_key": "${env:AZURE_TTS_KEY}", + "azure_subscription_region": "${env:AZURE_TTS_REGION}", + "azure_synthesis_voice_name": "en-US-AndrewMultilingualNeural" + } + }, + { + "type": "extension", + "extension_group": "transcriber", + "addon": "message_collector", + "name": "message_collector" + } + ], + "connections": [ + { + "extension_group": "default", + "extension": "agora_rtc", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "default", + "extension": "interrupt_detector" + }, + { + "extension_group": "chatgpt", + "extension": "openai_chatgpt" + }, + { + "extension_group": "transcriber", + "extension": "message_collector" + } + ] + } + ], + "video_frame": [ + { + "name": "video_frame", + "dest": [ + { + "extension_group": "chatgpt", + "extension": "vision_tool" + }, + { + "extension_group": "chatgpt", + "extension": "computer_tool_python" + } + ] + } + ], + "cmd": [ + { + "name": "on_user_joined", + "dest": [ + { + "extension_group": "chatgpt", + "extension": "openai_chatgpt" + } + ] + }, + { + "name": "on_user_left", + "dest": [ + { + "extension_group": "chatgpt", + "extension": "openai_chatgpt" + } + ] + } + ] + }, + { + "extension_group": "chatgpt", + "extension": "openai_chatgpt", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "tts", + "extension": "azure_tts" + }, + { + "extension_group": "transcriber", + "extension": "message_collector" + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "tts", + "extension": "azure_tts" + } + ] + }, + { + "name": "tool_call", + "dest": [ + { + "extension_group": "chatgpt", + "extension": "vision_tool" + }, + { + "extension_group": "chatgpt", + "extension": "computer_tool_python" + }, + { + "extension_group": "chatgpt", + "extension": "weatherapi_tool_python" + }, + { + "extension_group": "chatgpt", + "extension": "bingsearch_tool_python" + } + ] + } + ] + }, + { + "extension_group": "chatgpt", + "extension": "computer_tool_python", + "data": [ + { + "name": "data", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ], + "cmd": [ + { + "name": "tool_register", + "dest": [ + { + "extension_group": "chatgpt", + "extension": "openai_chatgpt" + } + ] + } + ] + }, + { + "extension_group": "chatgpt", + "extension": "vision_tool", + "cmd": [ + { + "name": "tool_register", + "dest": [ + { + "extension_group": "chatgpt", + "extension": "openai_chatgpt" + } + ] + } + ] + }, + { + "extension_group": "chatgpt", + "extension": "weatherapi_tool_python", + "cmd": [ + { + "name": "tool_register", + "dest": [ + { + "extension_group": "chatgpt", + "extension": "openai_chatgpt" + } + ] + } + ] + }, + { + "extension_group": "chatgpt", + "extension": "bingsearch_tool_python", + "cmd": [ + { + "name": "tool_register", + "dest": [ + { + "extension_group": "chatgpt", + "extension": "openai_chatgpt" + } + ] + } + ] + }, + { + "extension_group": "tts", + "extension": "azure_tts", + "audio_frame": [ + { + "name": "pcm_frame", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ] + }, + { + "extension_group": "transcriber", + "extension": "message_collector", + "data": [ + { + "name": "data", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ] + }, + { + "extension_group": "default", + "extension": "interrupt_detector", + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "chatgpt", + "extension": "openai_chatgpt" + } + ] + } + ] + } + ] + }, { "name": "va_openai_azure_fashionai", "auto_start": false, diff --git a/agents/ten_packages/extension/computer_tool_python/BUILD.gn b/agents/ten_packages/extension/computer_tool_python/BUILD.gn new file mode 100644 index 00000000..8c84bd41 --- /dev/null +++ b/agents/ten_packages/extension/computer_tool_python/BUILD.gn @@ -0,0 +1,19 @@ +# +# This file is part of TEN Framework, an open source project. +# Licensed under the Apache License, Version 2.0. +# See the LICENSE file for more information. +# +import("//build/feature/ten_package.gni") + +ten_package("computer_tool_python") { + package_kind = "extension" + + resources = [ + "__init__.py", + "addon.py", + "extension.py", + "manifest.json", + "property.json", + "tests", + ] +} diff --git a/agents/ten_packages/extension/computer_tool_python/README.md b/agents/ten_packages/extension/computer_tool_python/README.md new file mode 100644 index 00000000..f82873b8 --- /dev/null +++ b/agents/ten_packages/extension/computer_tool_python/README.md @@ -0,0 +1,22 @@ +# computer_tool_python + +This is the tool demo for computer use. + +## Features + +- Open the Application +- Analyze the code through screen sharing +- Generate code +- Save the content to the Note book + +## API + +Refer to `api` definition in [manifest.json] and default values in [property.json](property.json). + +### Out: + +- `tool_register`: auto register tool to llm + +### In: + +- `tool_call`: sync cmd to call computer usecase action \ No newline at end of file diff --git a/agents/ten_packages/extension/computer_tool_python/__init__.py b/agents/ten_packages/extension/computer_tool_python/__init__.py new file mode 100644 index 00000000..72593ab2 --- /dev/null +++ b/agents/ten_packages/extension/computer_tool_python/__init__.py @@ -0,0 +1,6 @@ +# +# This file is part of TEN Framework, an open source project. +# Licensed under the Apache License, Version 2.0. +# See the LICENSE file for more information. +# +from . import addon diff --git a/agents/ten_packages/extension/computer_tool_python/addon.py b/agents/ten_packages/extension/computer_tool_python/addon.py new file mode 100644 index 00000000..5e6ef817 --- /dev/null +++ b/agents/ten_packages/extension/computer_tool_python/addon.py @@ -0,0 +1,19 @@ +# +# This file is part of TEN Framework, an open source project. +# Licensed under the Apache License, Version 2.0. +# See the LICENSE file for more information. +# +from ten import ( + Addon, + register_addon_as_extension, + TenEnv, +) +from .extension import ComputerToolExtension + + +@register_addon_as_extension("computer_tool_python") +class ComputerToolExtensionAddon(Addon): + + def on_create_instance(self, ten_env: TenEnv, name: str, context) -> None: + ten_env.log_info("ComputerToolExtensionAddon on_create_instance") + ten_env.on_create_instance_done(ComputerToolExtension(name), context) diff --git a/agents/ten_packages/extension/computer_tool_python/extension.py b/agents/ten_packages/extension/computer_tool_python/extension.py new file mode 100644 index 00000000..d7b0849b --- /dev/null +++ b/agents/ten_packages/extension/computer_tool_python/extension.py @@ -0,0 +1,388 @@ +# +# This file is part of TEN Framework, an open source project. +# Licensed under the Apache License, Version 2.0. +# See the LICENSE file for more information. +# +import asyncio +import json +from base64 import b64encode +from io import BytesIO +from typing import Any, Dict +import threading +from ten_ai_base.llm_tool import AsyncLLMToolBaseExtension +from ten_ai_base.types import LLMToolMetadata, LLMToolMetadataParameter, LLMToolResult, LLMDataCompletionArgs +from .openai import OpenAIChatGPT, OpenAIChatGPTConfig + +from PIL import Image +from ten_ai_base.helper import AsyncEventEmitter + +import traceback +from ten import ( + AsyncTenEnv, + AudioFrame, + VideoFrame, + Cmd, + Data +) + +START_APP_TOOL_NAME = "start_app" +START_APP_TOOL_DESCRIPTION = "start an application with name" + +SAVE_TO_NOTEBOOK_TOOL_NAME = "save_to_notebook" +SAVE_TO_NOTEBOOK_TOOL_DESCRIPTION = "call this whenever you need to save anything to notebook" + +GENERATE_SOURCE_CODE_TOOL_NAME = "generate_source_code" +GENERATE_SOURCE_CODE_TOOL_DESCRIPTION = "Use this tool whenever the user requests to generate or write source code. Examples include: 'Can you generate a function to get the current time?', 'Generate a shopping cart list using Swift.', 'Write a Python script to fetch weather data.', 'Create a Java class for a user profile.'" + +GET_SOURCE_CODE_TOOL_NAME = "get_source_code" +GET_SOURCE_CODE_TOOL_DESCRIPTION = "call this whenever you need to get source code for evaluation or refactoring, for example when user asks 'Can you help me with the code?', 'Can you help take a look at my code?', 'Take a look at my source code', You can use screenshare or not to get code" + +def rgb2base64jpeg(rgb_data, width, height): + # Convert the RGB image to a PIL Image + pil_image = Image.frombytes("RGBA", (width, height), bytes(rgb_data)) + pil_image = pil_image.convert("RGB") + + # Resize the image while maintaining its aspect ratio + pil_image = resize_image_keep_aspect(pil_image, 1080) + + # Save the image to a BytesIO object in JPEG format + buffered = BytesIO() + pil_image.save(buffered, format="png") + pil_image.save("test.png", format="png") + + # Get the byte data of the JPEG image + jpeg_image_data = buffered.getvalue() + + # Convert the JPEG byte data to a Base64 encoded string + base64_encoded_image = b64encode(jpeg_image_data).decode("utf-8") + + # Create the data URL + mime_type = "image/png" + base64_url = f"data:{mime_type};base64,{base64_encoded_image}" + return base64_url + + +def resize_image_keep_aspect(image, max_size=512): + """ + Resize an image while maintaining its aspect ratio, ensuring the larger dimension is max_size. + If both dimensions are smaller than max_size, the image is not resized. + + :param image: A PIL Image object + :param max_size: The maximum size for the larger dimension (width or height) + :return: A PIL Image object (resized or original) + """ + # Get current width and height + width, height = image.size + + # If both dimensions are already smaller than max_size, return the original image + if width <= max_size and height <= max_size: + return image + + # Calculate the aspect ratio + aspect_ratio = width / height + + # Determine the new dimensions + if width > height: + new_width = max_size + new_height = int(max_size / aspect_ratio) + else: + new_height = max_size + new_width = int(max_size * aspect_ratio) + + # Resize the image with the new dimensions + resized_image = image.resize((new_width, new_height)) + + return resized_image + +class ComputerToolExtension(AsyncLLMToolBaseExtension): + + def __init__(self, name: str) -> None: + super().__init__(name) + self.openai_chatgpt = None + self.config = None + self.coding_text_queue = asyncio.Queue() + self.loop = None + self.memory = [] + self.max_memory_length = 10 + self.image_data = None + self.image_width = 0 + self.image_height = 0 + self.coding_text = "" + self.apps_list = { + "WeChat": "com.tencent.xinWeChat", + "Xcode": "com.apple.dt.Xcode", + "Safari": "com.apple.Safari", + "Word": "com.microsoft.Word", + "Excel": "com.microsoft.Excel", + "PowerPoint": "com.microsoft.PowerPoint", + "Pages": "com.apple.Pages", + "Numbers": "com.apple.Numbers", + "Keynote": "com.apple.Keynote", + "AppleMusic": "com.apple.Music", + "Photos": "com.apple.Photos", + "Mail": "com.apple.mail", + "Messages": "com.apple.Messages", + "Calendar": "com.apple.Calendar", + "Notes": "com.apple.Notes", + } + + async def on_init(self, ten_env: AsyncTenEnv) -> None: + ten_env.log_debug("on_init") + await super().on_init(ten_env) + + async def on_start(self, ten_env: AsyncTenEnv) -> None: + ten_env.log_debug("on_start") + self.loop = asyncio.new_event_loop() + + def start_loop(): + asyncio.set_event_loop(self.loop) + self.loop.run_forever() + + threading.Thread(target=start_loop, args=[]).start() + asyncio.run_coroutine_threadsafe(self.process_coding_text_queue(ten_env), self.loop) + await super().on_start(ten_env) + + # Prepare configuration + self.config = OpenAIChatGPTConfig.create(ten_env=ten_env) + + # Mandatory properties + if not self.config.api_key: + ten_env.log_info(f"API key is missing, exiting on_start") + return + + self.openai_chatgpt = OpenAIChatGPT(ten_env, self.config) + + async def on_stop(self, ten_env: AsyncTenEnv) -> None: + ten_env.log_debug("on_stop") + await super().on_stop(ten_env) + + async def on_deinit(self, ten_env: AsyncTenEnv) -> None: + ten_env.log_debug("on_deinit") + await super().on_deinit(ten_env) + + async def on_cmd(self, ten_env: AsyncTenEnv, cmd: Cmd) -> None: + cmd_name = cmd.get_name() + ten_env.log_debug("on_cmd name {}".format(cmd_name)) + await super().on_cmd(ten_env, cmd) + if cmd_name == "coding_text": + self.coding_text = cmd.get_property_string("text") + elif cmd_name == "flush": + asyncio.run_coroutine_threadsafe(self.flush_coding_text_queue(), self.loop) + + async def on_audio_frame(self, ten_env: AsyncTenEnv, audio_frame: AudioFrame) -> None: + audio_frame_name = audio_frame.get_name() + ten_env.log_debug("on_audio_frame name {}".format(audio_frame_name)) + + # TODO: process audio frame + pass + + async def on_video_frame(self, ten_env: AsyncTenEnv, video_frame: VideoFrame) -> None: + video_frame_name = video_frame.get_name() + ten_env.log_debug("on_video_frame name {}".format(video_frame_name)) + + self.image_data = video_frame.get_buf() + self.image_width = video_frame.get_width() + self.image_height = video_frame.get_height() + + def get_tool_metadata(self, ten_env: AsyncTenEnv) -> list[LLMToolMetadata]: + return [ + LLMToolMetadata( + name=START_APP_TOOL_NAME, + description=START_APP_TOOL_DESCRIPTION, + parameters=[ + LLMToolMetadataParameter( + name="name", + type="string", + description="The application name to start", + required=True, + ) + ] + ), + LLMToolMetadata( + name=SAVE_TO_NOTEBOOK_TOOL_NAME, + description=SAVE_TO_NOTEBOOK_TOOL_DESCRIPTION, + parameters=[ + LLMToolMetadataParameter( + name="text", + type="string", + description="The text to save", + required=True, + ) + ] + ), + LLMToolMetadata( + name=GENERATE_SOURCE_CODE_TOOL_NAME, + description=GENERATE_SOURCE_CODE_TOOL_DESCRIPTION, + parameters=[ + LLMToolMetadataParameter( + name="name", + type="string", + description="What code do you want to generate?", + required=True, + ) + ] + ), + LLMToolMetadata( + name=GET_SOURCE_CODE_TOOL_NAME, + description=GET_SOURCE_CODE_TOOL_DESCRIPTION, + parameters=[ + LLMToolMetadataParameter( + name="use_screenshare", + type="boolean", + description="use screenshare to get the code, if false, use the code from clipboard", + required=True, + ), + LLMToolMetadataParameter( + name="name", + type="string", + description="What do you want to do with the code?", + required=True, + ) + ] + ) + ] + + async def run_tool(self, ten_env: AsyncTenEnv, name: str, args: dict) -> LLMToolResult: + if name == START_APP_TOOL_NAME: + app_name = args.get("name") + if app_name not in self.apps_list: + return {"content": json.dumps({"text": f"app {app_name} not found"})} + result = await self._start_application(args, ten_env) + return {"content": json.dumps(result)} + elif name == SAVE_TO_NOTEBOOK_TOOL_NAME: + result = await self._save_to_notebook(args, ten_env) + return {"content": json.dumps(result)} + elif name == GET_SOURCE_CODE_TOOL_NAME: + use_screenshare = args.get("use_screenshare") + name = args.get("name") + if not use_screenshare: + if self.coding_text is not None: + asyncio.create_task(self.coding_completion(ten_env, ["coding", [{ + "type": "text", + "text": self.coding_text + }, { + "type": "text", + "text": name + }], self.memory], "generate_source")) + return {"content": json.dumps({"text": f"just say 'just say 'the coding assistance has been provided'"})} + else: + return {"content": json.dumps({"text": f"just say 'i can't find code in your clipboard'"})} + else: + if self.image_data is not None: + base64_image = rgb2base64jpeg(self.image_data, self.image_width, self.image_height) + asyncio.create_task(self.coding_completion(ten_env, ["coding", [{ + "type": "image_url", + "image_url": {"url": base64_image} + }, { + "type": "text", + "text": name + }], self.memory], "generate_source")) + return {"content": json.dumps({"text": f"just say 'just say 'the coding assistance has been provided'"})} + else: + return {"content": json.dumps({"text": f"just say 'screenshare is not visible to me'"})} + elif name == GENERATE_SOURCE_CODE_TOOL_NAME: + generate_name = args.get("name") + asyncio.create_task(self.coding_completion(ten_env, ["coding", [{ + "type": "text", + "text": generate_name + }], self.memory], "generate_source")) + return {"content": json.dumps({"text": f"just say 'the coding assistance has been provided'"})} + + async def _start_application(self, args: dict, ten_env: AsyncTenEnv) -> Any: + app_name = args.get("name") + app_bundle_id = self.apps_list.get(app_name) + self._send_data(ten_env, "start_app", {"bundle_id": app_bundle_id, "app_name": app_name}) + return {"text": f"just say '{app_name} is starting'"} + + async def _save_to_notebook(self, args: dict, ten_env: AsyncTenEnv) -> Any: + text = args.get("text") + self._send_data(ten_env, "save_to_notebook", args) + return {"text": f"just say 'the content has been saved to notebook'"} + + def _send_data(self, ten_env: AsyncTenEnv, action: str, data: Dict[str, Any]): + try: + action_data = json.dumps({ + "data_type": "action", + "action": action, + "data": data + }) + output_data = Data.create("data") + output_data.set_property_buf("data", action_data.encode("utf-8")) + ten_env.send_data(output_data) + except Exception as err: + ten_env.log_warn(f"send data error {err}") + + async def coding_completion(self, ten_env: AsyncTenEnv, data: any, action: str) -> None: + """Run the chatflow asynchronously.""" + [task_type, content, memory] = data + try: + message = None + tools = None + + message = {"role": "user", "content": content} + non_artifact_content = [item for item in content if item.get("type") != "image_url"] + non_artifact_message = {"role": "user", "content": non_artifact_content} + + response_text = "" + + # Create an asyncio.Event to signal when content is finished + content_finished_event = asyncio.Event() + + async def handle_content_update(content:str): + nonlocal response_text + # Append the content to the last assistant message + try: + await self.coding_text_queue.put({"text":content, "is_final": False, "action": action}) + except Exception as e: + ten_env.log_error(f"Error in handle_content_update: {traceback.format_exc()} for input text: {content}") + response_text += content + + async def handle_content_finished(full_content:str): + # Wait for the single tool task to complete (if any) + try: + await self.coding_text_queue.put({"text":"", "is_final": True, "action": action}) + except Exception as e: + ten_env.log_error(f"Error in handle_content_finished: {traceback.format_exc()} for input text: {full_content}") + content_finished_event.set() + + listener = AsyncEventEmitter() + listener.on("content_update", handle_content_update) + listener.on("content_finished", handle_content_finished) + + # Make an async API call to get chat completions + await self.openai_chatgpt.get_chat_completions_stream(memory + [message], tools, listener) + + # Wait for the content to be finished + await content_finished_event.wait() + + self._append_memory(non_artifact_message) + self._append_memory({"role": "assistant", "content": response_text}) + except asyncio.CancelledError: + ten_env.log_info(f"Task cancelled: {content}") + except Exception as e: + ten_env.log_error(f"Error in chat_completion: {traceback.format_exc()} for input text: {content}") + finally: + ten_env.log_info(f"Task completed: {content}") + + def _append_memory(self, message: Dict[str, Any]): + if len(self.memory) > self.max_memory_length: + self.memory.pop(0) + self.memory.append(message) + + async def flush_coding_text_queue(self): + while not self.coding_text_queue.empty(): + await self.coding_text_queue.get() + self.coding_text_queue.task_done() + + async def process_coding_text_queue(self, ten_env: AsyncTenEnv): + while True: + if self.coding_text_queue.empty(): + await asyncio.sleep(0.1) + continue + try: + coding_text = await self.coding_text_queue.get() + self._send_data(ten_env, coding_text["action"], {"text": coding_text["text"], "is_final": coding_text["is_final"]}) + except Exception as e: + ten_env.log_error(f"Error in process_coding_text_queue: {traceback.format_exc()} for input text: {coding_text}") + self.coding_text_queue.task_done() + await asyncio.sleep(0.1) \ No newline at end of file diff --git a/agents/ten_packages/extension/computer_tool_python/manifest.json b/agents/ten_packages/extension/computer_tool_python/manifest.json new file mode 100644 index 00000000..d093b102 --- /dev/null +++ b/agents/ten_packages/extension/computer_tool_python/manifest.json @@ -0,0 +1,122 @@ +{ + "type": "extension", + "name": "computer_tool_python", + "version": "0.4.2", + "dependencies": [ + { + "type": "system", + "name": "ten_runtime_python", + "version": "0.4.2" + } + ], + "package": { + "include": [ + "manifest.json", + "property.json", + "BUILD.gn", + "**.tent", + "**.py", + "README.md", + "tests/**" + ] + }, + "api": { + "property": { + "api_key": { + "type": "string" + }, + "frequency_penalty": { + "type": "float64" + }, + "presence_penalty": { + "type": "float64" + }, + "temperature": { + "type": "float64" + }, + "top_p": { + "type": "float64" + }, + "model": { + "type": "string" + }, + "max_tokens": { + "type": "int64" + }, + "base_url": { + "type": "string" + }, + "prompt": { + "type": "string" + }, + "greeting": { + "type": "string" + }, + "proxy_url": { + "type": "string" + }, + "max_memory_length": { + "type": "int64" + }, + "vendor": { + "type": "string" + }, + "azure_endpoint": { + "type": "string" + }, + "azure_api_version": { + "type": "string" + } + }, + "cmd_out": [ + { + "name": "tool_register", + "property": { + "name": { + "type": "string" + }, + "description": { + "type": "string" + }, + "parameters": { + "type": "string" + } + }, + "required": [ + "name", + "description", + "parameters" + ], + "result": { + "property": { + "response": { + "type": "string" + } + } + } + } + ], + "cmd_in": [ + { + "name": "tool_call", + "property": { + "name": { + "type": "string" + }, + "args": { + "type": "string" + } + }, + "required": [ + "name" + ] + } + ], + "video_frame_in": [ + { + "name": "video_frame", + "property": {} + } + ] + } +} \ No newline at end of file diff --git a/agents/ten_packages/extension/computer_tool_python/openai.py b/agents/ten_packages/extension/computer_tool_python/openai.py new file mode 100644 index 00000000..dbff6521 --- /dev/null +++ b/agents/ten_packages/extension/computer_tool_python/openai.py @@ -0,0 +1,137 @@ +import random +import requests +from openai import AsyncOpenAI +from ten_ai_base.config import BaseConfig +from dataclasses import dataclass +from ten.async_ten_env import AsyncTenEnv + +@dataclass +class OpenAIChatGPTConfig(BaseConfig): + api_key: str = "" + base_url: str = "https://api.openai.com/v1" + model: str = "gpt-4o" # Adjust this to match the equivalent of `openai.GPT4o` in the Python library + prompt: str = "You are a voice assistant who talks in a conversational way and can chat with me like my friends. I will speak to you in English or Chinese, and you will answer in the corrected and improved version of my text with the language I use. Don’t talk like a robot, instead I would like you to talk like a real human with emotions. I will use your answer for text-to-speech, so don’t return me any meaningless characters. I want you to be helpful, when I’m asking you for advice, give me precise, practical and useful advice instead of being vague. When giving me a list of options, express the options in a narrative way instead of bullet points." + frequency_penalty: float = 0.9 + presence_penalty: float = 0.9 + top_p: float = 1.0 + temperature: float = 0.1 + max_tokens: int = 512 + seed: int = random.randint(0, 10000) + proxy_url: str = "" + greeting: str = "Hello, how can I help you today?" + max_memory_length: int = 10 + vendor: str = "openai" + azure_endpoint: str = "" + azure_api_version: str = "" + + @classmethod + def default_config(cls): + return cls( + base_url="https://api.openai.com/v1", + api_key="", + model="gpt-4o", # Adjust this to match the equivalent of `openai.GPT4o` in the Python library + prompt="You are a voice assistant who talks in a conversational way and can chat with me like my friends. I will speak to you in English or Chinese, and you will answer in the corrected and improved version of my text with the language I use. Don’t talk like a robot, instead I would like you to talk like a real human with emotions. I will use your answer for text-to-speech, so don’t return me any meaningless characters. I want you to be helpful, when I’m asking you for advice, give me precise, practical and useful advice instead of being vague. When giving me a list of options, express the options in a narrative way instead of bullet points.", + frequency_penalty=0.9, + presence_penalty=0.9, + top_p=1.0, + temperature=0.1, + max_tokens=512, + seed=random.randint(0, 10000), + proxy_url="" + ) + + +class OpenAIChatGPT: + client = None + def __init__(self, ten_env:AsyncTenEnv, config: OpenAIChatGPTConfig): + self.config = config + ten_env.log_info(f"apikey {config.api_key}, base_url {config.base_url}") + self.client = AsyncOpenAI( + api_key=config.api_key, + base_url=config.base_url + ) + self.session = requests.Session() + if config.proxy_url: + proxies = { + "http": config.proxy_url, + "https": config.proxy_url, + } + self.session.proxies.update(proxies) + self.client.session = self.session + + async def get_chat_completions_structured(self, messages, response_format): + req = { + "model":"gpt-4o-2024-08-06", + "messages": [ + { + "role": "system", + "content": self.config.prompt, + }, + *messages, + ], + "temperature": self.config.temperature, + "top_p": self.config.top_p, + "presence_penalty": self.config.presence_penalty, + "frequency_penalty": self.config.frequency_penalty, + "max_tokens": self.config.max_tokens, + "seed": self.config.seed, + "response_format": response_format, + } + + try: + completion = await self.client.beta.chat.completions.parse(**req) + response = completion.choices[0].message + if response.parsed: + return response.parsed + elif response.refusal: + # handle refusal + raise Exception(f"Refusal: {response.refusal}") + except Exception as e: + raise Exception(f"CreateChatCompletionStructured failed, err: {e}") + + async def get_chat_completions_stream(self, messages, tools = None, listener = None): + req = { + "model": self.config.model, + "messages": [ + { + "role": "system", + "content": self.config.prompt, + }, + *messages, + ], + "tools": tools, + "temperature": self.config.temperature, + "top_p": self.config.top_p, + "presence_penalty": self.config.presence_penalty, + "frequency_penalty": self.config.frequency_penalty, + "max_tokens": self.config.max_tokens, + "seed": self.config.seed, + "stream": True, + } + + try: + response = await self.client.chat.completions.create(**req) + except Exception as e: + raise Exception(f"CreateChatCompletionStream failed, err: {e}") + + full_content = "" + + async for chat_completion in response: + choice = chat_completion.choices[0] + delta = choice.delta + content = delta.content if delta and delta.content else "" + # Emit content update event (fire-and-forget) + if listener and content: + listener.emit('content_update', content) + + full_content += content + # Check for tool calls + if delta.tool_calls: + for tool_call in delta.tool_calls: + # Emit tool call event (fire-and-forget) + if listener: + listener.emit('tool_call', tool_call) + + # Emit content finished event after the loop completes + if listener: + listener.emit('content_finished', full_content) \ No newline at end of file diff --git a/agents/ten_packages/extension/computer_tool_python/property.json b/agents/ten_packages/extension/computer_tool_python/property.json new file mode 100644 index 00000000..9e26dfee --- /dev/null +++ b/agents/ten_packages/extension/computer_tool_python/property.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/agents/ten_packages/extension/computer_tool_python/tests/test_basic.py b/agents/ten_packages/extension/computer_tool_python/tests/test_basic.py new file mode 100644 index 00000000..c3755f44 --- /dev/null +++ b/agents/ten_packages/extension/computer_tool_python/tests/test_basic.py @@ -0,0 +1,36 @@ +# +# Copyright © 2024 Agora +# This file is part of TEN Framework, an open source project. +# Licensed under the Apache License, Version 2.0, with certain conditions. +# Refer to the "LICENSE" file in the root directory for more information. +# +from pathlib import Path +from ten import ExtensionTester, TenEnvTester, Cmd, CmdResult, StatusCode + + +class ExtensionTesterBasic(ExtensionTester): + def check_hello(self, ten_env: TenEnvTester, result: CmdResult): + statusCode = result.get_status_code() + print("receive hello_world, status:" + str(statusCode)) + + if statusCode == StatusCode.OK: + ten_env.stop_test() + + def on_start(self, ten_env: TenEnvTester) -> None: + new_cmd = Cmd.create("hello_world") + + print("send hello_world") + ten_env.send_cmd( + new_cmd, + lambda ten_env, result: self.check_hello(ten_env, result), + ) + + print("tester on_start_done") + ten_env.on_start_done() + + +def test_basic(): + tester = ExtensionTesterBasic() + tester.add_addon_base_dir(str(Path(__file__).resolve().parent.parent)) + tester.set_test_mode_single("default_async_extension_python") + tester.run()