diff --git a/agents/property.json b/agents/property.json index a9c5f805..28135587 100644 --- a/agents/property.json +++ b/agents/property.json @@ -2251,6 +2251,15 @@ "property": { "api_key": "${env:WEATHERAPI_API_KEY}" } + }, + { + "type": "extension", + "extension_group": "tools", + "addon": "bingsearch_tool_python", + "name": "bingsearch_tool_python", + "property": { + "api_key": "${env:BING_API_KEY}" + } } ], "connections": [ @@ -2284,6 +2293,21 @@ } ] }, + { + "extension_group": "tools", + "extension": "bingsearch_tool_python", + "cmd": [ + { + "name": "tool_register", + "dest": [ + { + "extension_group": "llm", + "extension": "openai_v2v_python" + } + ] + } + ] + }, { "extension_group": "llm", "extension": "openai_v2v_python", @@ -2320,11 +2344,82 @@ ] }, { - "name": "tool_call", + "name": "tool_call_get_current_weather", + "dest": [ + { + "extension_group": "tools", + "extension": "weatherapi_tool_python", + "msg_conversion": { + "type": "per_property", + "keep_original": true, + "rules": [ + { + "path": "_ten.name", + "conversion_mode": "fixed_value", + "value": "tool_call" + } + ] + } + } + ] + }, + { + "name": "tool_call_get_past_weather", + "dest": [ + { + "extension_group": "tools", + "extension": "weatherapi_tool_python", + "msg_conversion": { + "type": "per_property", + "keep_original": true, + "rules": [ + { + "path": "_ten.name", + "conversion_mode": "fixed_value", + "value": "tool_call" + } + ] + } + } + ] + }, + { + "name": "tool_call_get_future_weather", + "dest": [ + { + "extension_group": "tools", + "extension": "weatherapi_tool_python", + "msg_conversion": { + "type": "per_property", + "keep_original": true, + "rules": [ + { + "path": "_ten.name", + "conversion_mode": "fixed_value", + "value": "tool_call" + } + ] + } + } + ] + }, + { + "name": "tool_call_bing_search", "dest": [ { "extension_group": "tools", - "extension": "weatherapi_tool_python" + "extension": "bingsearch_tool_python", + "msg_conversion": { + "type": "per_property", + "keep_original": true, + "rules": [ + { + "path": "_ten.name", + "conversion_mode": "fixed_value", + "value": "tool_call" + } + ] + } } ] } diff --git a/agents/ten_packages/extension/bingsearch_tool_python/BUILD.gn b/agents/ten_packages/extension/bingsearch_tool_python/BUILD.gn new file mode 100644 index 00000000..6fe94dbb --- /dev/null +++ b/agents/ten_packages/extension/bingsearch_tool_python/BUILD.gn @@ -0,0 +1,21 @@ +# +# +# Agora Real Time Engagement +# Created by Wei Hu in 2022-11. +# Copyright (c) 2024 Agora IO. All rights reserved. +# +# +import("//build/feature/ten_package.gni") + +ten_package("bingsearch_tool_python") { + package_kind = "extension" + + resources = [ + "__init__.py", + "addon.py", + "extension.py", + "log.py", + "manifest.json", + "property.json", + ] +} diff --git a/agents/ten_packages/extension/bingsearch_tool_python/README.md b/agents/ten_packages/extension/bingsearch_tool_python/README.md new file mode 100644 index 00000000..581fdf5e --- /dev/null +++ b/agents/ten_packages/extension/bingsearch_tool_python/README.md @@ -0,0 +1,29 @@ +# bingsearch_tool_python + + + +## Features + + + +- xxx feature + +## API + +Refer to `api` definition in [manifest.json] and default values in [property.json](property.json). + + + +## Development + +### Build + + + +### Unit test + + + +## Misc + + diff --git a/agents/ten_packages/extension/bingsearch_tool_python/__init__.py b/agents/ten_packages/extension/bingsearch_tool_python/__init__.py new file mode 100644 index 00000000..57f5e81b --- /dev/null +++ b/agents/ten_packages/extension/bingsearch_tool_python/__init__.py @@ -0,0 +1,11 @@ +# +# +# Agora Real Time Engagement +# Created by Wei Hu in 2024-08. +# Copyright (c) 2024 Agora IO. All rights reserved. +# +# +from . import addon +from .log import logger + +logger.info("bingsearch_tool_python extension loaded") diff --git a/agents/ten_packages/extension/bingsearch_tool_python/addon.py b/agents/ten_packages/extension/bingsearch_tool_python/addon.py new file mode 100644 index 00000000..b2a6e95e --- /dev/null +++ b/agents/ten_packages/extension/bingsearch_tool_python/addon.py @@ -0,0 +1,22 @@ +# +# +# Agora Real Time Engagement +# Created by Wei Hu in 2024-08. +# Copyright (c) 2024 Agora IO. All rights reserved. +# +# +from ten import ( + Addon, + register_addon_as_extension, + TenEnv, +) +from .extension import BingSearchToolExtension +from .log import logger + + +@register_addon_as_extension("bingsearch_tool_python") +class BingSearchToolExtensionAddon(Addon): + + def on_create_instance(self, ten_env: TenEnv, name: str, context) -> None: + logger.info("BingSearchToolExtensionAddon on_create_instance") + ten_env.on_create_instance_done(BingSearchToolExtension(name), context) diff --git a/agents/ten_packages/extension/bingsearch_tool_python/extension.py b/agents/ten_packages/extension/bingsearch_tool_python/extension.py new file mode 100644 index 00000000..b737a677 --- /dev/null +++ b/agents/ten_packages/extension/bingsearch_tool_python/extension.py @@ -0,0 +1,193 @@ +# +# +# Agora Real Time Engagement +# Created by Wei Hu in 2024-08. +# Copyright (c) 2024 Agora IO. All rights reserved. +# +# +import json +import requests +from typing import Any, List + +from ten import ( + AudioFrame, + VideoFrame, + Extension, + TenEnv, + Cmd, + StatusCode, + CmdResult, + Data, +) +from .log import logger + +CMD_TOOL_REGISTER = "tool_register" +CMD_TOOL_CALL = "tool_call" +CMD_PROPERTY_NAME = "name" +CMD_PROPERTY_ARGS = "args" + +TOOL_REGISTER_PROPERTY_NAME = "name" +TOOL_REGISTER_PROPERTY_DESCRIPTON = "description" +TOOL_REGISTER_PROPERTY_PARAMETERS = "parameters" +TOOL_CALLBACK = "callback" + +TOOL_NAME = "bing_search" +TOOL_DESCRIPTION = "Use Bing.com to search for latest information. Call this function if you are not sure about the answer." +TOOL_PARAMETERS = { + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "The search query to call Bing Search." + } + }, + "required": ["query"], + } + +PROPERTY_API_KEY = "api_key" # Required + +DEFAULT_BING_SEARCH_ENDPOINT = "https://api.bing.microsoft.com/v7.0/search" + +# BING_SEARCH_ENDPOINT is the default endpoint for Bing Web Search API. +# Currently There are two web-based Bing Search services available on Azure, +# i.e. Bing Web Search[1] and Bing Custom Search[2]. Compared to Bing Custom Search, +# Both services that provides a wide range of search results, while Bing Custom +# Search requires you to provide an additional custom search instance, `customConfig`. +# Both services are available for BingSearchAPIWrapper. +# History of Azure Bing Search API: +# Before shown in Azure Marketplace as a separate service, Bing Search APIs were +# part of Azure Cognitive Services, the endpoint of which is unique, and the user +# must specify the endpoint when making a request. After transitioning to Azure +# Marketplace, the endpoint is standardized and the user does not need to specify +# the endpoint[3]. +# Reference: +# 1. https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/overview +# 2. https://learn.microsoft.com/en-us/bing/search-apis/bing-custom-search/overview +# 3. https://azure.microsoft.com/en-in/updates/bing-search-apis-will-transition-from-azure-cognitive-services-to-azure-marketplace-on-31-october-2023/ + +class BingSearchToolExtension(Extension): + api_key: str = "" + tools: dict = {} + k: int = 10 + + def on_init(self, ten_env: TenEnv) -> None: + logger.info("BingSearchToolExtension on_init") + self.tools = { + TOOL_NAME: { + TOOL_REGISTER_PROPERTY_NAME: TOOL_NAME, + TOOL_REGISTER_PROPERTY_DESCRIPTON: TOOL_DESCRIPTION, + TOOL_REGISTER_PROPERTY_PARAMETERS: TOOL_PARAMETERS, + TOOL_CALLBACK: self._do_search + } + } + + ten_env.on_init_done() + + def on_start(self, ten_env: TenEnv) -> None: + logger.info("BingSearchToolExtension on_start") + + try: + api_key = ten_env.get_property_string(PROPERTY_API_KEY) + self.api_key = api_key + except Exception as err: + logger.info( + f"GetProperty required {PROPERTY_API_KEY} failed, err: {err}") + return + + # Register func + for name, tool in self.tools.items(): + c = Cmd.create(CMD_TOOL_REGISTER) + c.set_property_string(TOOL_REGISTER_PROPERTY_NAME, name) + c.set_property_string(TOOL_REGISTER_PROPERTY_DESCRIPTON, tool[TOOL_REGISTER_PROPERTY_DESCRIPTON]) + c.set_property_string(TOOL_REGISTER_PROPERTY_PARAMETERS, json.dumps(tool[TOOL_REGISTER_PROPERTY_PARAMETERS])) + ten_env.send_cmd(c, lambda ten, result: logger.info(f"register done, {result}")) + + ten_env.on_start_done() + + def on_stop(self, ten_env: TenEnv) -> None: + logger.info("BingSearchToolExtension on_stop") + + # TODO: clean up resources + + ten_env.on_stop_done() + + def on_deinit(self, ten_env: TenEnv) -> None: + logger.info("BingSearchToolExtension on_deinit") + ten_env.on_deinit_done() + + def on_cmd(self, ten_env: TenEnv, cmd: Cmd) -> None: + cmd_name = cmd.get_name() + logger.info(f"on_cmd name {cmd_name} {cmd.to_json()}") + + # FIXME need to handle async + try: + name = cmd.get_property_string(CMD_PROPERTY_NAME) + if name in self.tools: + try: + tool = self.tools[name] + args = cmd.get_property_string(CMD_PROPERTY_ARGS) + arg_dict = json.loads(args) + logger.info(f"before callback {name}") + resp = tool[TOOL_CALLBACK](arg_dict) + logger.info(f"after callback {resp}") + cmd_result = CmdResult.create(StatusCode.OK) + cmd_result.set_property_string("response", json.dumps(resp)) + ten_env.return_result(cmd_result, cmd) + return + except: + logger.exception("Failed to callback") + cmd_result = CmdResult.create(StatusCode.ERROR) + ten_env.return_result(cmd_result, cmd) + return + else: + logger.error(f"unknown tool name {name}") + except: + logger.exception("Failed to get tool name") + cmd_result = CmdResult.create(StatusCode.ERROR) + ten_env.return_result(cmd_result, cmd) + return + + cmd_result = CmdResult.create(StatusCode.OK) + ten_env.return_result(cmd_result, cmd) + + def on_data(self, ten_env: TenEnv, data: Data) -> None: + pass + + def on_audio_frame(self, ten_env: TenEnv, audio_frame: AudioFrame) -> None: + pass + + def on_video_frame(self, ten_env: TenEnv, video_frame: VideoFrame) -> None: + pass + + def _do_search(self, args:dict) -> Any: + if "query" not in args: + raise Exception("Failed to get property") + + query = args["query"] + snippets = [] + results = self._bing_search_results(query, count=self.k) + if len(results) == 0: + return "No good Bing Search Result was found" + for result in results: + snippets.append(result["snippet"]) + + return snippets + + def _bing_search_results(self, search_term: str, count: int) -> List[dict]: + headers = {"Ocp-Apim-Subscription-Key": self.api_key} + params = { + "q": search_term, + "count": count, + "textDecorations": True, + "textFormat": "HTML" + } + response = requests.get( + DEFAULT_BING_SEARCH_ENDPOINT, + headers=headers, + params=params, # type: ignore + ) + response.raise_for_status() + search_results = response.json() + if "webPages" in search_results: + return search_results["webPages"]["value"] + return [] \ No newline at end of file diff --git a/agents/ten_packages/extension/bingsearch_tool_python/log.py b/agents/ten_packages/extension/bingsearch_tool_python/log.py new file mode 100644 index 00000000..82cb60b5 --- /dev/null +++ b/agents/ten_packages/extension/bingsearch_tool_python/log.py @@ -0,0 +1,22 @@ +# +# +# Agora Real Time Engagement +# Created by Wei Hu in 2024-08. +# Copyright (c) 2024 Agora IO. All rights reserved. +# +# +import logging + +logger = logging.getLogger("bingsearch_tool_python") +logger.setLevel(logging.INFO) + +formatter_str = ( + "%(asctime)s - %(name)s - %(levelname)s - %(process)d - " + "[%(filename)s:%(lineno)d] - %(message)s" +) +formatter = logging.Formatter(formatter_str) + +console_handler = logging.StreamHandler() +console_handler.setFormatter(formatter) + +logger.addHandler(console_handler) diff --git a/agents/ten_packages/extension/bingsearch_tool_python/manifest.json b/agents/ten_packages/extension/bingsearch_tool_python/manifest.json new file mode 100644 index 00000000..3e5a4193 --- /dev/null +++ b/agents/ten_packages/extension/bingsearch_tool_python/manifest.json @@ -0,0 +1,23 @@ +{ + "type": "extension", + "name": "bingsearch_tool_python", + "version": "0.1.0", + "dependencies": [ + { + "type": "system", + "name": "ten_runtime_python", + "version": "0.2" + } + ], + "package": { + "include": [ + "manifest.json", + "property.json", + "BUILD.gn", + "**.tent", + "**.py", + "README.md" + ] + }, + "api": {} +} \ No newline at end of file diff --git a/agents/ten_packages/extension/bingsearch_tool_python/property.json b/agents/ten_packages/extension/bingsearch_tool_python/property.json new file mode 100644 index 00000000..9e26dfee --- /dev/null +++ b/agents/ten_packages/extension/bingsearch_tool_python/property.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/agents/ten_packages/extension/openai_v2v_python/README.md b/agents/ten_packages/extension/openai_v2v_python/README.md index 80cc9cc5..3cd294f3 100644 --- a/agents/ten_packages/extension/openai_v2v_python/README.md +++ b/agents/ten_packages/extension/openai_v2v_python/README.md @@ -48,3 +48,18 @@ Refer to `api` definition in [manifest.json] and default values in [property.jso | **Name** | **Description** | |------------------|-------------------------------------------| | `pcm_frame` | Audio frame output after voice processing | + + +### Azure Support + +This extension also support Azure OpenAI Service, the propoerty settings are as follow: + +``` json +{ + "base_uri": "wss://xxx.openai.azure.com", + "path": "/openai/realtime?api-version=xxx&deployment=xxx", + "api_key": "xxx", + "model": "gpt-4o-realtime-preview", + "vendor": "azure" +} +``` \ No newline at end of file diff --git a/agents/ten_packages/extension/openai_v2v_python/conf.py b/agents/ten_packages/extension/openai_v2v_python/conf.py index 2068e957..d72d1766 100644 --- a/agents/ten_packages/extension/openai_v2v_python/conf.py +++ b/agents/ten_packages/extension/openai_v2v_python/conf.py @@ -10,6 +10,7 @@ You should start by saying '{greeting}' using {language}. If interacting is not in {language}, start by using the standard accent or dialect familiar to the user. Talk quickly. Do not refer to these rules, even if you're asked about them. +{tools} ''' class RealtimeApiConfig: @@ -22,10 +23,10 @@ def __init__( model: str=DEFAULT_MODEL, language: str = "en-US", instruction: str = BASIC_PROMPT, - temperature: float =0.5, + temperature: float = 0.5, max_tokens: int = 1024, voice: Voices = Voices.Alloy, - server_vad:bool=True, + server_vad:bool=True ): self.base_uri = base_uri self.api_key = api_key diff --git a/agents/ten_packages/extension/openai_v2v_python/extension.py b/agents/ten_packages/extension/openai_v2v_python/extension.py index 5436a8b9..57ee3945 100644 --- a/agents/ten_packages/extension/openai_v2v_python/extension.py +++ b/agents/ten_packages/extension/openai_v2v_python/extension.py @@ -33,6 +33,9 @@ # properties PROPERTY_API_KEY = "api_key" # Required +PROPERTY_BASE_URI = "base_uri" # Optional +PROPERTY_PATH = "path" # Optional +PROPERTY_VENDOR = "vendor" # Optional PROPERTY_MODEL = "model" # Optional PROPERTY_SYSTEM_MESSAGE = "system_message" # Optional PROPERTY_TEMPERATURE = "temperature" # Optional @@ -86,7 +89,8 @@ def __init__(self, name: str): self.transcript: str = '' # misc. - self.greeting = DEFAULT_GREETING + self.greeting : str = DEFAULT_GREETING + self.vendor: str = "" # max history store in context self.max_history = 0 self.history = [] @@ -109,7 +113,7 @@ def start_event_loop(loop): target=start_event_loop, args=(self.loop,)) self.thread.start() - self._register_local_tools() + # self._register_local_tools() asyncio.run_coroutine_threadsafe(self._init_connection(), self.loop) @@ -173,7 +177,7 @@ def on_config_changed(self) -> None: async def _init_connection(self): try: self.conn = RealtimeApiConnection( - base_uri=self.config.base_uri, api_key=self.config.api_key, model=self.config.model, verbose=True) + base_uri=self.config.base_uri, path=self.config.path, api_key=self.config.api_key, model=self.config.model, vendor=self.vendor, verbose=True) logger.info(f"Finish init client {self.config} {self.conn}") except: logger.exception(f"Failed to create client {self.config}") @@ -221,7 +225,8 @@ def get_time_ms() -> int: f"On request transcript failed {message.item_id} {message.error}") case ItemCreated(): logger.info(f"On item created {message.item}") - if self.max_history and message.item["status"] == "completed": + + if self.max_history and ("status" not in message.item or message.item["status"] == "completed"): # need maintain the history await self._append_history(message.item) case ResponseCreated(): @@ -343,6 +348,25 @@ def _fetch_properties(self, ten_env: TenEnv): f"GetProperty required {PROPERTY_API_KEY} failed, err: {err}") return + try: + base_uri = ten_env.get_property_string(PROPERTY_BASE_URI) + if base_uri: + self.config.base_uri = base_uri + except Exception as err: + logger.info(f"GetProperty optional {PROPERTY_BASE_URI} error: {err}") + + try: + path = ten_env.get_property_string(PROPERTY_PATH) + if path: + self.config.path = path + except Exception as err: + logger.info(f"GetProperty optional {PROPERTY_PATH} error: {err}") + + try: + self.vendor = ten_env.get_property_string(PROPERTY_VENDOR) + except Exception as err: + logger.info(f"GetProperty optional {PROPERTY_VENDOR} error: {err}") + try: model = ten_env.get_property_string(PROPERTY_MODEL) if model: @@ -432,6 +456,7 @@ def _fetch_properties(self, ten_env: TenEnv): self.ctx["greeting"] = self.greeting def _update_session(self) -> SessionUpdate: + self.ctx["tools"] = self.registry.to_prompt() prompt = self._replace(self.config.instruction) self.last_updated = datetime.now() return SessionUpdate(session=SessionUpdateParams( @@ -510,8 +535,8 @@ def _dump_audio_if_need(self, buf: bytearray, role: Role) -> None: with open("{}_{}.pcm".format(role, self.channel_name), "ab") as dump_file: dump_file.write(buf) - def _register_local_tools(self) -> None: - self.ctx["tools"] = self.registry.to_prompt() + #def _register_local_tools(self) -> None: + # self.ctx["tools"] = self.registry.to_prompt() def _on_tool_register(self, ten_env: TenEnv, cmd: Cmd): try: @@ -532,7 +557,7 @@ def _on_tool_register(self, ten_env: TenEnv, cmd: Cmd): async def _remote_tool_call(self, ten_env: TenEnv, name: str, args: str, callback: Awaitable): logger.info(f"_remote_tool_call {name} {args}") - c = Cmd.create(CMD_TOOL_CALL) + c = Cmd.create(f"{CMD_TOOL_CALL}_{name}") c.set_property_string(CMD_PROPERTY_NAME, name) c.set_property_string(CMD_PROPERTY_ARGS, args) ten_env.send_cmd(c, lambda ten, result: asyncio.run_coroutine_threadsafe( diff --git a/agents/ten_packages/extension/openai_v2v_python/manifest.json b/agents/ten_packages/extension/openai_v2v_python/manifest.json index 898c1b87..55b91ece 100644 --- a/agents/ten_packages/extension/openai_v2v_python/manifest.json +++ b/agents/ten_packages/extension/openai_v2v_python/manifest.json @@ -26,6 +26,15 @@ "api_key": { "type": "string" }, + "base_uri": { + "type": "string" + }, + "path": { + "type": "string" + }, + "vendor": { + "type": "string" + }, "temperature": { "type": "float64" }, diff --git a/agents/ten_packages/extension/openai_v2v_python/realtime/connection.py b/agents/ten_packages/extension/openai_v2v_python/realtime/connection.py index 1f8fbdfc..94e81c79 100644 --- a/agents/ten_packages/extension/openai_v2v_python/realtime/connection.py +++ b/agents/ten_packages/extension/openai_v2v_python/realtime/connection.py @@ -11,6 +11,8 @@ DEFAULT_VIRTUAL_MODEL = "gpt-4o-realtime-preview" +VENDOR_AZURE = "azure" + def smart_str(s: str, max_field_len: int = 128) -> str: """parse string as json, truncate data field to 128 characters, reserialize""" try: @@ -36,10 +38,12 @@ def __init__( api_key: str | None = None, path: str = "/v1/realtime", model: str = DEFAULT_VIRTUAL_MODEL, + vendor: str = "", verbose: bool = False, ): + self.vendor = vendor self.url = f"{base_uri}{path}" - if "model=" not in self.url: + if not self.vendor and "model=" not in self.url: self.url += f"?model={model}" self.api_key = api_key or os.environ.get("OPENAI_API_KEY") @@ -56,9 +60,13 @@ async def __aexit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> bool return False async def connect(self): - auth = aiohttp.BasicAuth("", self.api_key) if self.api_key else None - - headers = {"OpenAI-Beta": "realtime=v1"} + headers = {} + auth = None + if self.vendor == VENDOR_AZURE: + headers = {"api-key": self.api_key} + elif not self.vendor: + auth = aiohttp.BasicAuth("", self.api_key) if self.api_key else None + headers = {"OpenAI-Beta": "realtime=v1"} self.websocket = await self.session.ws_connect( url=self.url, @@ -98,8 +106,8 @@ async def listen(self) -> AsyncGenerator[ServerToClientMessage, None]: def handle_server_message(self, message: str) -> ServerToClientMessage: try: return parse_server_message(message) - except Exception as e: - logger.error("Error handling message: " + str(e)) + except: + logger.exception("Error handling message") async def close(self): # Close the websocket connection if it exists diff --git a/agents/ten_packages/extension/openai_v2v_python/realtime/struct.py b/agents/ten_packages/extension/openai_v2v_python/realtime/struct.py index 78ba077a..d1c81efc 100644 --- a/agents/ten_packages/extension/openai_v2v_python/realtime/struct.py +++ b/agents/ten_packages/extension/openai_v2v_python/realtime/struct.py @@ -454,6 +454,7 @@ class ResponseContentPartAdded(ServerToClientMessage): output_index: int # Index of the output item in the response content_index: int # Index of the content part in the output part: Union[ItemParam, None] # The added content part + content: Union[ItemParam, None] = None # The added content part for azure type: str = EventType.RESPONSE_CONTENT_PART_ADDED # Fixed event type @dataclass @@ -463,6 +464,7 @@ class ResponseContentPartDone(ServerToClientMessage): output_index: int # Index of the output item in the response content_index: int # Index of the content part in the output part: Union[ItemParam, None] # The content part that was completed + content: Union[ItemParam, None] = None # The added content part for azure type: str = EventType.RESPONSE_CONTENT_PART_ADDED # Fixed event type @dataclass