From cd661c7d0bf254470f2197f91c1430a42f92ca69 Mon Sep 17 00:00:00 2001
From: qinhui <>
Date: Fri, 13 Dec 2024 20:42:21 +0800
Subject: [PATCH] Add computer tool extension

---
 agents/examples/experimental/property.json    | 353 ++++++++++++++++
 .../extension/computer_tool_python/BUILD.gn   |  19 +
 .../extension/computer_tool_python/README.md  |  22 +
 .../computer_tool_python/__init__.py          |   6 +
 .../extension/computer_tool_python/addon.py   |  19 +
 .../computer_tool_python/extension.py         | 388 ++++++++++++++++++
 .../computer_tool_python/manifest.json        | 122 ++++++
 .../extension/computer_tool_python/openai.py  | 137 +++++++
 .../computer_tool_python/property.json        |   1 +
 .../computer_tool_python/tests/test_basic.py  |  36 ++
 10 files changed, 1103 insertions(+)
 create mode 100644 agents/ten_packages/extension/computer_tool_python/BUILD.gn
 create mode 100644 agents/ten_packages/extension/computer_tool_python/README.md
 create mode 100644 agents/ten_packages/extension/computer_tool_python/__init__.py
 create mode 100644 agents/ten_packages/extension/computer_tool_python/addon.py
 create mode 100644 agents/ten_packages/extension/computer_tool_python/extension.py
 create mode 100644 agents/ten_packages/extension/computer_tool_python/manifest.json
 create mode 100644 agents/ten_packages/extension/computer_tool_python/openai.py
 create mode 100644 agents/ten_packages/extension/computer_tool_python/property.json
 create mode 100644 agents/ten_packages/extension/computer_tool_python/tests/test_basic.py

diff --git a/agents/examples/experimental/property.json b/agents/examples/experimental/property.json
index eca28972..0d46e3e4 100644
--- a/agents/examples/experimental/property.json
+++ b/agents/examples/experimental/property.json
@@ -2,6 +2,359 @@
   "_ten": {
     "log_level": 3,
     "predefined_graphs": [
+      {
+        "name": "computer_tool_openai_azure",
+        "auto_start": false,
+        "nodes": [
+          {
+            "type": "extension",
+            "extension_group": "default",
+            "addon": "agora_rtc",
+            "name": "agora_rtc",
+            "property": {
+              "app_id": "${env:AGORA_APP_ID}",
+              "token": "",
+              "channel": "ten_agent_test",
+              "stream_id": 1234,
+              "remote_stream_id": 123,
+              "subscribe_audio": true,
+              "subscribe_video": true,
+              "publish_audio": true,
+              "publish_data": true,
+              "enable_agora_asr": true,
+              "agora_asr_vendor_name": "microsoft",
+              "agora_asr_language": "en-US",
+              "agora_asr_vendor_key": "${env:AZURE_STT_KEY}",
+              "agora_asr_vendor_region": "${env:AZURE_STT_REGION}",
+              "agora_asr_session_control_file_path": "session_control.conf",
+              "subscribe_video_pix_fmt": 4
+            }
+          },
+          {
+            "type": "extension",
+            "extension_group": "default",
+            "addon": "interrupt_detector",
+            "name": "interrupt_detector"
+          },
+          {
+            "type": "extension",
+            "extension_group": "chatgpt",
+            "addon": "openai_chatgpt_python",
+            "name": "openai_chatgpt",
+            "property": {
+              "base_url": "${env:OPENAI_API_BASE}",
+              "api_key": "${env:OPENAI_API_KEY}",
+              "frequency_penalty": 0.9,
+              "model": "gpt-4o",
+              "max_tokens": 512,
+              "prompt": "",
+              "proxy_url": "${env:OPENAI_PROXY_URL}",
+              "greeting": "TEN Agent connected. How can I help you today?",
+              "checking_vision_text_items": "[\"Let me take a look...\",\"Let me check your camera...\",\"Please wait for a second...\"]",
+              "max_memory_length": 10,
+              "enable_tools": true
+            }
+          },
+          {
+            "type": "extension",
+            "extension_group": "chatgpt",
+            "addon": "vision_tool_python",
+            "name": "vision_tool"
+          },
+          {
+            "type": "extension",
+            "extension_group": "chatgpt",
+            "addon": "computer_tool_python",
+            "name": "computer_tool_python",
+            "property": {
+              "base_url": "${env:OPENAI_API_BASE}",
+              "api_key": "${env:OPENAI_API_KEY}",
+              "frequency_penalty": 0.9,
+              "model": "gpt-4o",
+              "max_tokens": 512,
+              "prompt": "",
+              "proxy_url": "${env:OPENAI_PROXY_URL}",
+              "greeting": "TEN Agent connected. How can I help you today?",
+              "checking_vision_text_items": "[\"Let me take a look...\",\"Let me check your camera...\",\"Please wait for a second...\"]",
+              "max_memory_length": 10,
+              "enable_tools": true
+            }
+          },
+          {
+            "type": "extension",
+            "extension_group": "chatgpt",
+            "addon": "weatherapi_tool_python",
+            "name": "weatherapi_tool_python",
+            "property": {
+              "api_key": "${env:WEATHERAPI_API_KEY}"
+            }
+          },
+          {
+            "type": "extension",
+            "extension_group": "chatgpt",
+            "addon": "bingsearch_tool_python",
+            "name": "bingsearch_tool_python",
+            "property": {
+              "api_key": "${env:BING_API_KEY}"
+            }
+          },
+          {
+            "type": "extension",
+            "extension_group": "tts",
+            "addon": "azure_tts",
+            "name": "azure_tts",
+            "property": {
+              "azure_subscription_key": "${env:AZURE_TTS_KEY}",
+              "azure_subscription_region": "${env:AZURE_TTS_REGION}",
+              "azure_synthesis_voice_name": "en-US-AndrewMultilingualNeural"
+            }
+          },
+          {
+            "type": "extension",
+            "extension_group": "transcriber",
+            "addon": "message_collector",
+            "name": "message_collector"
+          }
+        ],
+        "connections": [
+          {
+            "extension_group": "default",
+            "extension": "agora_rtc",
+            "data": [
+              {
+                "name": "text_data",
+                "dest": [
+                  {
+                    "extension_group": "default",
+                    "extension": "interrupt_detector"
+                  },
+                  {
+                    "extension_group": "chatgpt",
+                    "extension": "openai_chatgpt"
+                  },
+                  {
+                    "extension_group": "transcriber",
+                    "extension": "message_collector"
+                  }
+                ]
+              }
+            ],
+            "video_frame": [
+              {
+                "name": "video_frame",
+                "dest": [
+                  {
+                    "extension_group": "chatgpt",
+                    "extension": "vision_tool"
+                  },
+                  {
+                    "extension_group": "chatgpt",
+                    "extension": "computer_tool_python"
+                  }
+                ]
+              }
+            ],
+            "cmd": [
+              {
+                "name": "on_user_joined",
+                "dest": [
+                  {
+                    "extension_group": "chatgpt",
+                    "extension": "openai_chatgpt"
+                  }
+                ]
+              },
+              {
+                "name": "on_user_left",
+                "dest": [
+                  {
+                    "extension_group": "chatgpt",
+                    "extension": "openai_chatgpt"
+                  }
+                ]
+              }
+            ]
+          },
+          {
+            "extension_group": "chatgpt",
+            "extension": "openai_chatgpt",
+            "data": [
+              {
+                "name": "text_data",
+                "dest": [
+                  {
+                    "extension_group": "tts",
+                    "extension": "azure_tts"
+                  },
+                  {
+                    "extension_group": "transcriber",
+                    "extension": "message_collector"
+                  }
+                ]
+              }
+            ],
+            "cmd": [
+              {
+                "name": "flush",
+                "dest": [
+                  {
+                    "extension_group": "tts",
+                    "extension": "azure_tts"
+                  }
+                ]
+              },
+              {
+                "name": "tool_call",
+                "dest": [
+                  {
+                    "extension_group": "chatgpt",
+                    "extension": "vision_tool"
+                  },
+                  {
+                    "extension_group": "chatgpt",
+                    "extension": "computer_tool_python"
+                  },
+                  {
+                    "extension_group": "chatgpt",
+                    "extension": "weatherapi_tool_python"
+                  },
+                  {
+                    "extension_group": "chatgpt",
+                    "extension": "bingsearch_tool_python"
+                  }
+                ]
+              }
+            ]
+          },
+          {
+            "extension_group": "chatgpt",
+            "extension": "computer_tool_python",
+            "data": [
+              {
+                "name": "data",
+                "dest": [
+                  {
+                    "extension_group": "default",
+                    "extension": "agora_rtc"
+                  }
+                ]
+              }
+            ],
+            "cmd": [
+              {
+                "name": "tool_register",
+                "dest": [
+                  {
+                    "extension_group": "chatgpt",
+                    "extension": "openai_chatgpt"
+                  }
+                ]
+              }
+            ]
+          },
+          {
+            "extension_group": "chatgpt",
+            "extension": "vision_tool",
+            "cmd": [
+              {
+                "name": "tool_register",
+                "dest": [
+                  {
+                    "extension_group": "chatgpt",
+                    "extension": "openai_chatgpt"
+                  }
+                ]
+              }
+            ]
+          },
+          {
+            "extension_group": "chatgpt",
+            "extension": "weatherapi_tool_python",
+            "cmd": [
+              {
+                "name": "tool_register",
+                "dest": [
+                  {
+                    "extension_group": "chatgpt",
+                    "extension": "openai_chatgpt"
+                  }
+                ]
+              }
+            ]
+          },
+          {
+            "extension_group": "chatgpt",
+            "extension": "bingsearch_tool_python",
+            "cmd": [
+              {
+                "name": "tool_register",
+                "dest": [
+                  {
+                    "extension_group": "chatgpt",
+                    "extension": "openai_chatgpt"
+                  }
+                ]
+              }
+            ]
+          },
+          {
+            "extension_group": "tts",
+            "extension": "azure_tts",
+            "audio_frame": [
+              {
+                "name": "pcm_frame",
+                "dest": [
+                  {
+                    "extension_group": "default",
+                    "extension": "agora_rtc"
+                  }
+                ]
+              }
+            ],
+            "cmd": [
+              {
+                "name": "flush",
+                "dest": [
+                  {
+                    "extension_group": "default",
+                    "extension": "agora_rtc"
+                  }
+                ]
+              }
+            ]
+          },
+          {
+            "extension_group": "transcriber",
+            "extension": "message_collector",
+            "data": [
+              {
+                "name": "data",
+                "dest": [
+                  {
+                    "extension_group": "default",
+                    "extension": "agora_rtc"
+                  }
+                ]
+              }
+            ]
+          },
+          {
+            "extension_group": "default",
+            "extension": "interrupt_detector",
+            "cmd": [
+              {
+                "name": "flush",
+                "dest": [
+                  {
+                    "extension_group": "chatgpt",
+                    "extension": "openai_chatgpt"
+                  }
+                ]
+              }
+            ]
+          }
+        ]
+      },
       {
         "name": "va_openai_azure_fashionai",
         "auto_start": false,
diff --git a/agents/ten_packages/extension/computer_tool_python/BUILD.gn b/agents/ten_packages/extension/computer_tool_python/BUILD.gn
new file mode 100644
index 00000000..8c84bd41
--- /dev/null
+++ b/agents/ten_packages/extension/computer_tool_python/BUILD.gn
@@ -0,0 +1,19 @@
+#
+# This file is part of TEN Framework, an open source project.
+# Licensed under the Apache License, Version 2.0.
+# See the LICENSE file for more information.
+#
+import("//build/feature/ten_package.gni")
+
+ten_package("computer_tool_python") {
+  package_kind = "extension"
+
+  resources = [
+    "__init__.py",
+    "addon.py",
+    "extension.py",
+    "manifest.json",
+    "property.json",
+    "tests",
+  ]
+}
diff --git a/agents/ten_packages/extension/computer_tool_python/README.md b/agents/ten_packages/extension/computer_tool_python/README.md
new file mode 100644
index 00000000..f82873b8
--- /dev/null
+++ b/agents/ten_packages/extension/computer_tool_python/README.md
@@ -0,0 +1,22 @@
+# computer_tool_python
+
+This is the tool demo for computer use.
+
+## Features
+
+- Open the Application
+- Analyze the code through screen sharing
+- Generate code
+- Save the content to the Note book
+
+## API
+
+Refer to `api` definition in [manifest.json] and default values in [property.json](property.json).
+
+### Out:
+
+- `tool_register`: auto register tool to llm
+
+### In:
+
+- `tool_call`: sync cmd to call computer usecase action
\ No newline at end of file
diff --git a/agents/ten_packages/extension/computer_tool_python/__init__.py b/agents/ten_packages/extension/computer_tool_python/__init__.py
new file mode 100644
index 00000000..72593ab2
--- /dev/null
+++ b/agents/ten_packages/extension/computer_tool_python/__init__.py
@@ -0,0 +1,6 @@
+#
+# This file is part of TEN Framework, an open source project.
+# Licensed under the Apache License, Version 2.0.
+# See the LICENSE file for more information.
+#
+from . import addon
diff --git a/agents/ten_packages/extension/computer_tool_python/addon.py b/agents/ten_packages/extension/computer_tool_python/addon.py
new file mode 100644
index 00000000..5e6ef817
--- /dev/null
+++ b/agents/ten_packages/extension/computer_tool_python/addon.py
@@ -0,0 +1,19 @@
+#
+# This file is part of TEN Framework, an open source project.
+# Licensed under the Apache License, Version 2.0.
+# See the LICENSE file for more information.
+#
+from ten import (
+    Addon,
+    register_addon_as_extension,
+    TenEnv,
+)
+from .extension import ComputerToolExtension
+
+
+@register_addon_as_extension("computer_tool_python")
+class ComputerToolExtensionAddon(Addon):
+
+    def on_create_instance(self, ten_env: TenEnv, name: str, context) -> None:
+        ten_env.log_info("ComputerToolExtensionAddon on_create_instance")
+        ten_env.on_create_instance_done(ComputerToolExtension(name), context)
diff --git a/agents/ten_packages/extension/computer_tool_python/extension.py b/agents/ten_packages/extension/computer_tool_python/extension.py
new file mode 100644
index 00000000..d7b0849b
--- /dev/null
+++ b/agents/ten_packages/extension/computer_tool_python/extension.py
@@ -0,0 +1,388 @@
+#
+# This file is part of TEN Framework, an open source project.
+# Licensed under the Apache License, Version 2.0.
+# See the LICENSE file for more information.
+#
+import asyncio
+import json
+from base64 import b64encode
+from io import BytesIO
+from typing import Any, Dict
+import threading
+from ten_ai_base.llm_tool import AsyncLLMToolBaseExtension
+from ten_ai_base.types import LLMToolMetadata, LLMToolMetadataParameter, LLMToolResult, LLMDataCompletionArgs
+from .openai import OpenAIChatGPT, OpenAIChatGPTConfig
+
+from PIL import Image
+from ten_ai_base.helper import AsyncEventEmitter
+
+import traceback
+from ten import (
+    AsyncTenEnv,
+    AudioFrame,
+    VideoFrame,
+    Cmd,
+    Data
+)
+
+START_APP_TOOL_NAME = "start_app"
+START_APP_TOOL_DESCRIPTION = "start an application with name"
+
+SAVE_TO_NOTEBOOK_TOOL_NAME = "save_to_notebook"
+SAVE_TO_NOTEBOOK_TOOL_DESCRIPTION = "call this whenever you need to save anything to notebook"    
+
+GENERATE_SOURCE_CODE_TOOL_NAME = "generate_source_code"
+GENERATE_SOURCE_CODE_TOOL_DESCRIPTION = "Use this tool whenever the user requests to generate or write source code. Examples include: 'Can you generate a function to get the current time?', 'Generate a shopping cart list using Swift.', 'Write a Python script to fetch weather data.', 'Create a Java class for a user profile.'"
+
+GET_SOURCE_CODE_TOOL_NAME = "get_source_code"
+GET_SOURCE_CODE_TOOL_DESCRIPTION = "call this whenever you need to get source code for evaluation or refactoring, for example when user asks 'Can you help me with the code?', 'Can you help take a look at my code?', 'Take a look at my source code', You can use screenshare or not to get code"
+
+def rgb2base64jpeg(rgb_data, width, height):
+    # Convert the RGB image to a PIL Image
+    pil_image = Image.frombytes("RGBA", (width, height), bytes(rgb_data))
+    pil_image = pil_image.convert("RGB")
+
+    # Resize the image while maintaining its aspect ratio
+    pil_image = resize_image_keep_aspect(pil_image, 1080)
+
+    # Save the image to a BytesIO object in JPEG format
+    buffered = BytesIO()
+    pil_image.save(buffered, format="png")
+    pil_image.save("test.png", format="png")
+
+    # Get the byte data of the JPEG image
+    jpeg_image_data = buffered.getvalue()
+
+    # Convert the JPEG byte data to a Base64 encoded string
+    base64_encoded_image = b64encode(jpeg_image_data).decode("utf-8")
+
+    # Create the data URL
+    mime_type = "image/png"
+    base64_url = f"data:{mime_type};base64,{base64_encoded_image}"
+    return base64_url
+
+
+def resize_image_keep_aspect(image, max_size=512):
+    """
+    Resize an image while maintaining its aspect ratio, ensuring the larger dimension is max_size.
+    If both dimensions are smaller than max_size, the image is not resized.
+
+    :param image: A PIL Image object
+    :param max_size: The maximum size for the larger dimension (width or height)
+    :return: A PIL Image object (resized or original)
+    """
+    # Get current width and height
+    width, height = image.size
+
+    # If both dimensions are already smaller than max_size, return the original image
+    if width <= max_size and height <= max_size:
+        return image
+
+    # Calculate the aspect ratio
+    aspect_ratio = width / height
+
+    # Determine the new dimensions
+    if width > height:
+        new_width = max_size
+        new_height = int(max_size / aspect_ratio)
+    else:
+        new_height = max_size
+        new_width = int(max_size * aspect_ratio)
+
+    # Resize the image with the new dimensions
+    resized_image = image.resize((new_width, new_height))
+
+    return resized_image
+
+class ComputerToolExtension(AsyncLLMToolBaseExtension):
+    
+    def __init__(self, name: str) -> None:
+        super().__init__(name)
+        self.openai_chatgpt = None
+        self.config = None
+        self.coding_text_queue = asyncio.Queue()    
+        self.loop = None
+        self.memory = []
+        self.max_memory_length = 10
+        self.image_data = None
+        self.image_width = 0
+        self.image_height = 0
+        self.coding_text = ""
+        self.apps_list = {
+            "WeChat": "com.tencent.xinWeChat",
+            "Xcode": "com.apple.dt.Xcode",
+            "Safari": "com.apple.Safari",
+            "Word": "com.microsoft.Word",
+            "Excel": "com.microsoft.Excel",
+            "PowerPoint": "com.microsoft.PowerPoint",
+            "Pages": "com.apple.Pages",
+            "Numbers": "com.apple.Numbers",
+            "Keynote": "com.apple.Keynote",
+            "AppleMusic": "com.apple.Music",
+            "Photos": "com.apple.Photos",
+            "Mail": "com.apple.mail",
+            "Messages": "com.apple.Messages",
+            "Calendar": "com.apple.Calendar",
+            "Notes": "com.apple.Notes",
+        }
+
+    async def on_init(self, ten_env: AsyncTenEnv) -> None:
+        ten_env.log_debug("on_init")
+        await super().on_init(ten_env)
+
+    async def on_start(self, ten_env: AsyncTenEnv) -> None:
+        ten_env.log_debug("on_start")
+        self.loop = asyncio.new_event_loop()
+
+        def start_loop():
+            asyncio.set_event_loop(self.loop)
+            self.loop.run_forever()
+
+        threading.Thread(target=start_loop, args=[]).start()
+        asyncio.run_coroutine_threadsafe(self.process_coding_text_queue(ten_env), self.loop)
+        await super().on_start(ten_env)
+
+        # Prepare configuration
+        self.config = OpenAIChatGPTConfig.create(ten_env=ten_env)
+
+        # Mandatory properties
+        if not self.config.api_key:
+            ten_env.log_info(f"API key is missing, exiting on_start")
+            return
+        
+        self.openai_chatgpt = OpenAIChatGPT(ten_env, self.config)   
+
+    async def on_stop(self, ten_env: AsyncTenEnv) -> None:
+        ten_env.log_debug("on_stop")
+        await super().on_stop(ten_env)
+
+    async def on_deinit(self, ten_env: AsyncTenEnv) -> None:
+        ten_env.log_debug("on_deinit")
+        await super().on_deinit(ten_env)
+
+    async def on_cmd(self, ten_env: AsyncTenEnv, cmd: Cmd) -> None:
+        cmd_name = cmd.get_name()
+        ten_env.log_debug("on_cmd name {}".format(cmd_name))
+        await super().on_cmd(ten_env, cmd)
+        if cmd_name == "coding_text":
+            self.coding_text = cmd.get_property_string("text")
+        elif cmd_name == "flush":
+            asyncio.run_coroutine_threadsafe(self.flush_coding_text_queue(), self.loop)
+
+    async def on_audio_frame(self, ten_env: AsyncTenEnv, audio_frame: AudioFrame) -> None:
+        audio_frame_name = audio_frame.get_name()
+        ten_env.log_debug("on_audio_frame name {}".format(audio_frame_name))
+
+        # TODO: process audio frame
+        pass
+
+    async def on_video_frame(self, ten_env: AsyncTenEnv, video_frame: VideoFrame) -> None:
+        video_frame_name = video_frame.get_name()
+        ten_env.log_debug("on_video_frame name {}".format(video_frame_name))
+
+        self.image_data = video_frame.get_buf()
+        self.image_width = video_frame.get_width()
+        self.image_height = video_frame.get_height()
+
+    def get_tool_metadata(self, ten_env: AsyncTenEnv) -> list[LLMToolMetadata]:
+        return [
+            LLMToolMetadata(
+                name=START_APP_TOOL_NAME,
+                description=START_APP_TOOL_DESCRIPTION,
+                parameters=[
+                    LLMToolMetadataParameter(
+                        name="name",
+                        type="string",
+                        description="The application name to start",
+                        required=True,
+                    )
+                ]
+            ),
+            LLMToolMetadata(
+                name=SAVE_TO_NOTEBOOK_TOOL_NAME,
+                description=SAVE_TO_NOTEBOOK_TOOL_DESCRIPTION,
+                parameters=[
+                    LLMToolMetadataParameter(
+                        name="text",
+                        type="string",
+                        description="The text to save",
+                        required=True,
+                    )
+                ]
+            ),
+            LLMToolMetadata(
+                name=GENERATE_SOURCE_CODE_TOOL_NAME,
+                description=GENERATE_SOURCE_CODE_TOOL_DESCRIPTION,
+                parameters=[
+                    LLMToolMetadataParameter(
+                        name="name",
+                        type="string",
+                        description="What code do you want to generate?",
+                        required=True,
+                    )
+                ]
+            ),
+            LLMToolMetadata(
+                name=GET_SOURCE_CODE_TOOL_NAME,
+                description=GET_SOURCE_CODE_TOOL_DESCRIPTION,
+                parameters=[
+                    LLMToolMetadataParameter(
+                        name="use_screenshare",
+                        type="boolean",
+                        description="use screenshare to get the code, if false, use the code from clipboard",
+                        required=True,
+                    ),
+                    LLMToolMetadataParameter(
+                        name="name",
+                        type="string",
+                        description="What do you want to do with the code?",
+                        required=True,
+                    )
+                ]
+            )
+        ]
+
+    async def run_tool(self, ten_env: AsyncTenEnv, name: str, args: dict) -> LLMToolResult:
+        if name == START_APP_TOOL_NAME:
+            app_name = args.get("name")
+            if app_name not in self.apps_list:
+                return {"content": json.dumps({"text": f"app {app_name} not found"})}
+            result = await self._start_application(args, ten_env)
+            return {"content": json.dumps(result)}
+        elif name == SAVE_TO_NOTEBOOK_TOOL_NAME:
+            result = await self._save_to_notebook(args, ten_env)
+            return {"content": json.dumps(result)}
+        elif name == GET_SOURCE_CODE_TOOL_NAME:
+            use_screenshare = args.get("use_screenshare")
+            name = args.get("name")
+            if not use_screenshare:
+                if self.coding_text is not None:
+                    asyncio.create_task(self.coding_completion(ten_env, ["coding", [{
+                        "type": "text",
+                        "text": self.coding_text
+                    }, {
+                        "type": "text",
+                        "text": name
+                    }], self.memory], "generate_source"))
+                    return {"content": json.dumps({"text": f"just say 'just say 'the coding assistance has been provided'"})}
+                else:
+                    return {"content": json.dumps({"text": f"just say 'i can't find code in your clipboard'"})}
+            else:
+                if self.image_data is not None: 
+                    base64_image = rgb2base64jpeg(self.image_data, self.image_width, self.image_height)
+                    asyncio.create_task(self.coding_completion(ten_env, ["coding", [{
+                        "type": "image_url",
+                        "image_url": {"url": base64_image}
+                    }, {
+                        "type": "text",
+                        "text": name
+                    }], self.memory], "generate_source"))
+                    return {"content": json.dumps({"text": f"just say 'just say 'the coding assistance has been provided'"})}
+                else:
+                    return {"content": json.dumps({"text": f"just say 'screenshare is not visible to me'"})}
+        elif name == GENERATE_SOURCE_CODE_TOOL_NAME:
+            generate_name = args.get("name")
+            asyncio.create_task(self.coding_completion(ten_env, ["coding", [{
+                "type": "text",
+                "text": generate_name
+            }], self.memory], "generate_source"))
+            return {"content": json.dumps({"text": f"just say 'the coding assistance has been provided'"})}
+
+    async def _start_application(self, args: dict, ten_env: AsyncTenEnv) -> Any:
+        app_name = args.get("name")
+        app_bundle_id = self.apps_list.get(app_name)
+        self._send_data(ten_env, "start_app", {"bundle_id": app_bundle_id, "app_name": app_name})
+        return {"text": f"just say '{app_name} is starting'"}
+
+    async def _save_to_notebook(self, args: dict, ten_env: AsyncTenEnv) -> Any:
+        text = args.get("text")
+        self._send_data(ten_env, "save_to_notebook", args)
+        return {"text": f"just say 'the content has been saved to notebook'"}
+
+    def _send_data(self, ten_env: AsyncTenEnv, action: str, data: Dict[str, Any]):
+        try:
+            action_data = json.dumps({
+                "data_type": "action",
+                "action": action,
+                "data": data
+            })
+            output_data = Data.create("data")
+            output_data.set_property_buf("data", action_data.encode("utf-8"))
+            ten_env.send_data(output_data)
+        except Exception as err:
+            ten_env.log_warn(f"send data error {err}")
+
+    async def coding_completion(self, ten_env: AsyncTenEnv, data: any, action: str) -> None:
+        """Run the chatflow asynchronously."""
+        [task_type, content, memory] = data
+        try:
+            message = None
+            tools = None
+
+            message = {"role": "user", "content": content}
+            non_artifact_content = [item for item in content if item.get("type") != "image_url"]
+            non_artifact_message = {"role": "user", "content": non_artifact_content}
+
+            response_text = ""
+
+            # Create an asyncio.Event to signal when content is finished
+            content_finished_event = asyncio.Event()
+
+            async def handle_content_update(content:str):
+                nonlocal response_text
+                # Append the content to the last assistant message
+                try:
+                    await self.coding_text_queue.put({"text":content, "is_final": False, "action": action})
+                except Exception as e:
+                    ten_env.log_error(f"Error in handle_content_update: {traceback.format_exc()} for input text: {content}")
+                response_text += content
+
+            async def handle_content_finished(full_content:str):
+                # Wait for the single tool task to complete (if any)
+                try:
+                    await self.coding_text_queue.put({"text":"", "is_final": True, "action": action})
+                except Exception as e:
+                    ten_env.log_error(f"Error in handle_content_finished: {traceback.format_exc()} for input text: {full_content}")
+                content_finished_event.set()
+
+            listener = AsyncEventEmitter()
+            listener.on("content_update", handle_content_update)
+            listener.on("content_finished", handle_content_finished)
+
+            # Make an async API call to get chat completions
+            await self.openai_chatgpt.get_chat_completions_stream(memory + [message], tools, listener)
+
+            # Wait for the content to be finished
+            await content_finished_event.wait()
+
+            self._append_memory(non_artifact_message)
+            self._append_memory({"role": "assistant", "content": response_text})
+        except asyncio.CancelledError:
+            ten_env.log_info(f"Task cancelled: {content}")
+        except Exception as e:
+            ten_env.log_error(f"Error in chat_completion: {traceback.format_exc()} for input text: {content}")
+        finally:
+            ten_env.log_info(f"Task completed: {content}")
+
+    def _append_memory(self, message: Dict[str, Any]):
+        if len(self.memory) > self.max_memory_length:
+            self.memory.pop(0)
+        self.memory.append(message)    
+    
+    async def flush_coding_text_queue(self):
+        while not self.coding_text_queue.empty():
+            await self.coding_text_queue.get()
+            self.coding_text_queue.task_done()
+    
+    async def process_coding_text_queue(self, ten_env: AsyncTenEnv):
+        while True:
+            if self.coding_text_queue.empty():
+                await asyncio.sleep(0.1)
+                continue
+            try:
+                coding_text = await self.coding_text_queue.get()
+                self._send_data(ten_env, coding_text["action"], {"text": coding_text["text"], "is_final": coding_text["is_final"]})
+            except Exception as e:
+                ten_env.log_error(f"Error in process_coding_text_queue: {traceback.format_exc()} for input text: {coding_text}")
+            self.coding_text_queue.task_done()
+            await asyncio.sleep(0.1)
\ No newline at end of file
diff --git a/agents/ten_packages/extension/computer_tool_python/manifest.json b/agents/ten_packages/extension/computer_tool_python/manifest.json
new file mode 100644
index 00000000..d093b102
--- /dev/null
+++ b/agents/ten_packages/extension/computer_tool_python/manifest.json
@@ -0,0 +1,122 @@
+{
+  "type": "extension",
+  "name": "computer_tool_python",
+  "version": "0.4.2",
+  "dependencies": [
+    {
+      "type": "system",
+      "name": "ten_runtime_python",
+      "version": "0.4.2"
+    }
+  ],
+  "package": {
+    "include": [
+      "manifest.json",
+      "property.json",
+      "BUILD.gn",
+      "**.tent",
+      "**.py",
+      "README.md",
+      "tests/**"
+    ]
+  },
+  "api": {
+    "property": {
+      "api_key": {
+        "type": "string"
+      },
+      "frequency_penalty": {
+        "type": "float64"
+      },
+      "presence_penalty": {
+        "type": "float64"
+      },
+      "temperature": {
+        "type": "float64"
+      },
+      "top_p": {
+        "type": "float64"
+      },
+      "model": {
+        "type": "string"
+      },
+      "max_tokens": {
+        "type": "int64"
+      },
+      "base_url": {
+        "type": "string"
+      },
+      "prompt": {
+        "type": "string"
+      },
+      "greeting": {
+        "type": "string"
+      },
+      "proxy_url": {
+        "type": "string"
+      },
+      "max_memory_length": {
+        "type": "int64"
+      },
+      "vendor": {
+        "type": "string"
+      },
+      "azure_endpoint": {
+        "type": "string"
+      },
+      "azure_api_version": {
+        "type": "string"
+      }
+    },
+    "cmd_out": [
+      {
+        "name": "tool_register",
+        "property": {
+          "name": {
+            "type": "string"
+          },
+          "description": {
+            "type": "string"
+          },
+          "parameters": {
+            "type": "string"
+          }
+        },
+        "required": [
+          "name",
+          "description",
+          "parameters"
+        ],
+        "result": {
+          "property": {
+            "response": {
+              "type": "string"
+            }
+          }
+        }
+      }
+    ],
+    "cmd_in": [
+      {
+        "name": "tool_call",
+        "property": {
+          "name": {
+            "type": "string"
+          },
+          "args": {
+            "type": "string"
+          }
+        },
+        "required": [
+          "name"
+        ]
+      }
+    ],
+    "video_frame_in": [
+      {
+        "name": "video_frame",
+        "property": {}
+      }
+    ]
+  }
+}
\ No newline at end of file
diff --git a/agents/ten_packages/extension/computer_tool_python/openai.py b/agents/ten_packages/extension/computer_tool_python/openai.py
new file mode 100644
index 00000000..dbff6521
--- /dev/null
+++ b/agents/ten_packages/extension/computer_tool_python/openai.py
@@ -0,0 +1,137 @@
+import random
+import requests
+from openai import AsyncOpenAI
+from ten_ai_base.config import BaseConfig
+from dataclasses import dataclass
+from ten.async_ten_env import AsyncTenEnv
+
+@dataclass
+class OpenAIChatGPTConfig(BaseConfig):
+    api_key: str = ""
+    base_url: str = "https://api.openai.com/v1"
+    model: str = "gpt-4o"  # Adjust this to match the equivalent of `openai.GPT4o` in the Python library
+    prompt: str = "You are a voice assistant who talks in a conversational way and can chat with me like my friends. I will speak to you in English or Chinese, and you will answer in the corrected and improved version of my text with the language I use. Don’t talk like a robot, instead I would like you to talk like a real human with emotions. I will use your answer for text-to-speech, so don’t return me any meaningless characters. I want you to be helpful, when I’m asking you for advice, give me precise, practical and useful advice instead of being vague. When giving me a list of options, express the options in a narrative way instead of bullet points."
+    frequency_penalty: float = 0.9
+    presence_penalty: float = 0.9
+    top_p: float = 1.0
+    temperature: float = 0.1
+    max_tokens: int = 512
+    seed: int = random.randint(0, 10000)
+    proxy_url: str = ""
+    greeting: str = "Hello, how can I help you today?"
+    max_memory_length: int = 10
+    vendor: str = "openai"
+    azure_endpoint: str = ""
+    azure_api_version: str = ""
+
+    @classmethod
+    def default_config(cls):
+        return cls(
+            base_url="https://api.openai.com/v1",
+            api_key="",
+            model="gpt-4o",  # Adjust this to match the equivalent of `openai.GPT4o` in the Python library
+            prompt="You are a voice assistant who talks in a conversational way and can chat with me like my friends. I will speak to you in English or Chinese, and you will answer in the corrected and improved version of my text with the language I use. Don’t talk like a robot, instead I would like you to talk like a real human with emotions. I will use your answer for text-to-speech, so don’t return me any meaningless characters. I want you to be helpful, when I’m asking you for advice, give me precise, practical and useful advice instead of being vague. When giving me a list of options, express the options in a narrative way instead of bullet points.",
+            frequency_penalty=0.9,
+            presence_penalty=0.9,
+            top_p=1.0,
+            temperature=0.1,
+            max_tokens=512,
+            seed=random.randint(0, 10000),
+            proxy_url=""
+        )
+    
+
+class OpenAIChatGPT:
+    client = None
+    def __init__(self, ten_env:AsyncTenEnv, config: OpenAIChatGPTConfig):
+        self.config = config
+        ten_env.log_info(f"apikey {config.api_key}, base_url {config.base_url}")
+        self.client = AsyncOpenAI(
+            api_key=config.api_key,
+            base_url=config.base_url
+        )
+        self.session = requests.Session()
+        if config.proxy_url:
+            proxies = {
+                "http": config.proxy_url,
+                "https": config.proxy_url,
+            }
+            self.session.proxies.update(proxies)
+        self.client.session = self.session
+
+    async def get_chat_completions_structured(self, messages, response_format):
+        req = {
+            "model":"gpt-4o-2024-08-06",
+            "messages": [
+                {
+                    "role": "system",
+                    "content": self.config.prompt,
+                },
+                *messages,
+            ],
+            "temperature": self.config.temperature,
+            "top_p": self.config.top_p,
+            "presence_penalty": self.config.presence_penalty,
+            "frequency_penalty": self.config.frequency_penalty,
+            "max_tokens": self.config.max_tokens,
+            "seed": self.config.seed,
+            "response_format": response_format,
+        }
+
+        try:
+            completion = await self.client.beta.chat.completions.parse(**req)
+            response = completion.choices[0].message
+            if response.parsed:
+                return response.parsed
+            elif response.refusal:
+                # handle refusal
+                raise Exception(f"Refusal: {response.refusal}")
+        except Exception as e:
+            raise Exception(f"CreateChatCompletionStructured failed, err: {e}")
+
+    async def get_chat_completions_stream(self, messages, tools = None, listener = None):
+        req = {
+            "model": self.config.model,
+            "messages": [
+                {
+                    "role": "system",
+                    "content": self.config.prompt,
+                },
+                *messages,
+            ],
+            "tools": tools,
+            "temperature": self.config.temperature,
+            "top_p": self.config.top_p,
+            "presence_penalty": self.config.presence_penalty,
+            "frequency_penalty": self.config.frequency_penalty,
+            "max_tokens": self.config.max_tokens,
+            "seed": self.config.seed,
+            "stream": True,
+        }
+
+        try:
+            response = await self.client.chat.completions.create(**req)
+        except Exception as e:
+            raise Exception(f"CreateChatCompletionStream failed, err: {e}")
+        
+        full_content = ""
+
+        async for chat_completion in response:
+            choice = chat_completion.choices[0]
+            delta = choice.delta
+            content = delta.content if delta and delta.content else ""
+            # Emit content update event (fire-and-forget)
+            if listener and content:
+                listener.emit('content_update', content)
+
+            full_content += content
+            # Check for tool calls
+            if delta.tool_calls:
+                for tool_call in delta.tool_calls:
+                    # Emit tool call event (fire-and-forget)
+                    if listener:
+                        listener.emit('tool_call', tool_call)
+
+        # Emit content finished event after the loop completes
+        if listener:
+            listener.emit('content_finished', full_content)
\ No newline at end of file
diff --git a/agents/ten_packages/extension/computer_tool_python/property.json b/agents/ten_packages/extension/computer_tool_python/property.json
new file mode 100644
index 00000000..9e26dfee
--- /dev/null
+++ b/agents/ten_packages/extension/computer_tool_python/property.json
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/agents/ten_packages/extension/computer_tool_python/tests/test_basic.py b/agents/ten_packages/extension/computer_tool_python/tests/test_basic.py
new file mode 100644
index 00000000..c3755f44
--- /dev/null
+++ b/agents/ten_packages/extension/computer_tool_python/tests/test_basic.py
@@ -0,0 +1,36 @@
+#
+# Copyright © 2024 Agora
+# This file is part of TEN Framework, an open source project.
+# Licensed under the Apache License, Version 2.0, with certain conditions.
+# Refer to the "LICENSE" file in the root directory for more information.
+#
+from pathlib import Path
+from ten import ExtensionTester, TenEnvTester, Cmd, CmdResult, StatusCode
+
+
+class ExtensionTesterBasic(ExtensionTester):
+    def check_hello(self, ten_env: TenEnvTester, result: CmdResult):
+        statusCode = result.get_status_code()
+        print("receive hello_world, status:" + str(statusCode))
+
+        if statusCode == StatusCode.OK:
+            ten_env.stop_test()
+
+    def on_start(self, ten_env: TenEnvTester) -> None:
+        new_cmd = Cmd.create("hello_world")
+
+        print("send hello_world")
+        ten_env.send_cmd(
+            new_cmd,
+            lambda ten_env, result: self.check_hello(ten_env, result),
+        )
+
+        print("tester on_start_done")
+        ten_env.on_start_done()
+
+
+def test_basic():
+    tester = ExtensionTesterBasic()
+    tester.add_addon_base_dir(str(Path(__file__).resolve().parent.parent))
+    tester.set_test_mode_single("default_async_extension_python")
+    tester.run()