diff --git a/autogen/agent_utils.py b/autogen/agent_utils.py new file mode 100644 index 000000000000..431d03c78d01 --- /dev/null +++ b/autogen/agent_utils.py @@ -0,0 +1,51 @@ +from typing import List, Dict, Tuple +from autogen import Agent + + +def gather_usage_summary(agents: List[Agent]) -> Tuple[Dict[str, any], Dict[str, any]]: + """Gather usage summary from all agents. + + Args: + agents: (list): List of agents. + + Returns: + tuple: (total_usage_summary, actual_usage_summary) + + Example return: + total_usage_summary = { + 'total_cost': 0.0006090000000000001, + 'gpt-35-turbo': + { + 'cost': 0.0006090000000000001, + 'prompt_tokens': 242, + 'completion_tokens': 123, + 'total_tokens': 365 + } + } + `actual_usage_summary` follows the same format. + If none of the agents incurred any cost (not having a client), then the total_usage_summary and actual_usage_summary will be {'total_cost': 0}. + """ + + def aggregate_summary(usage_summary: Dict[str, any], agent_summary: Dict[str, any]) -> None: + if agent_summary is None: + return + usage_summary["total_cost"] += agent_summary.get("total_cost", 0) + for model, data in agent_summary.items(): + if model != "total_cost": + if model not in usage_summary: + usage_summary[model] = data.copy() + else: + usage_summary[model]["cost"] += data.get("cost", 0) + usage_summary[model]["prompt_tokens"] += data.get("prompt_tokens", 0) + usage_summary[model]["completion_tokens"] += data.get("completion_tokens", 0) + usage_summary[model]["total_tokens"] += data.get("total_tokens", 0) + + total_usage_summary = {"total_cost": 0} + actual_usage_summary = {"total_cost": 0} + + for agent in agents: + if agent.client: + aggregate_summary(total_usage_summary, agent.client.total_usage_summary) + aggregate_summary(actual_usage_summary, agent.client.actual_usage_summary) + + return total_usage_summary, actual_usage_summary diff --git a/autogen/agentchat/conversable_agent.py b/autogen/agentchat/conversable_agent.py index 1bf64889a04a..59e7fd21cc9f 100644 --- a/autogen/agentchat/conversable_agent.py +++ b/autogen/agentchat/conversable_agent.py @@ -699,6 +699,8 @@ def reset(self): self.clear_history() self.reset_consecutive_auto_reply_counter() self.stop_reply_at_receive() + if self.client is not None: + self.client.clear_usage_summary() for reply_func_tuple in self._reply_func_list: if reply_func_tuple["reset_config"] is not None: reply_func_tuple["reset_config"](reply_func_tuple["config"]) @@ -1890,3 +1892,25 @@ def process_last_message(self, messages): messages = messages.copy() messages[-1]["content"] = processed_user_text return messages + + def print_usage_summary(self, mode: Union[str, List[str]] = ["actual", "total"]) -> None: + """Print the usage summary.""" + if self.client is None: + print(f"No cost incurred from agent '{self.name}'.") + else: + print(f"Agent '{self.name}':") + self.client.print_usage_summary(mode) + + def get_actual_usage(self) -> Union[None, Dict[str, int]]: + """Get the actual usage summary.""" + if self.client is None: + return None + else: + return self.client.actual_usage_summary + + def get_total_usage(self) -> Union[None, Dict[str, int]]: + """Get the total usage summary.""" + if self.client is None: + return None + else: + return self.client.total_usage_summary diff --git a/autogen/oai/openai_utils.py b/autogen/oai/openai_utils.py index 66332e4f909f..e900e2143149 100644 --- a/autogen/oai/openai_utils.py +++ b/autogen/oai/openai_utils.py @@ -42,7 +42,7 @@ "gpt-4-0613": (0.03, 0.06), "gpt-4-32k-0613": (0.06, 0.12), # 11-06 - "gpt-3.5-turbo": (0.001, 0.002), + "gpt-3.5-turbo": (0.0015, 0.002), # default is still 0613 "gpt-3.5-turbo-1106": (0.001, 0.002), "gpt-35-turbo-1106": (0.001, 0.002), "gpt-4-1106-preview": (0.01, 0.03), diff --git a/notebook/oai_client_cost.ipynb b/notebook/agentchat_cost_token_tracking.ipynb similarity index 53% rename from notebook/oai_client_cost.ipynb rename to notebook/agentchat_cost_token_tracking.ipynb index 7798f315abbf..b61121e79166 100644 --- a/notebook/oai_client_cost.ipynb +++ b/notebook/agentchat_cost_token_tracking.ipynb @@ -15,7 +15,8 @@ "\n", "Licensed under the MIT License.\n", "\n", - "# Use AutoGen's OpenAIWrapper for cost estimation\n", + "# Usage tracking with AtuoGen\n", + "## 1. Use AutoGen's OpenAIWrapper for cost estimation\n", "The `OpenAIWrapper` from `autogen` tracks token counts and costs of your API calls. Use the `create()` method to initiate requests and `print_usage_summary()` to retrieve a detailed usage report, including total cost and token usage for both cached and actual requests.\n", "\n", "- `mode=[\"actual\", \"total\"]` (default): print usage summary for non-caching completions and all completions (including cache).\n", @@ -24,6 +25,17 @@ "\n", "Reset your session's usage data with `clear_usage_summary()` when needed.\n", "\n", + "## 2. Track cost and token count for agents\n", + "We also support cost estimation for agents. Use `Agent.print_usage_summary()` to print the cost summary for the agent.\n", + "You can retrieve usage summary in a dict using `Agent.get_actual_usage()` and `Agent.get_total_usage()`. Note that `Agent.reset()` will also reset the usage summary.\n", + "\n", + "To gather usage data for a list of agents, we provide an utility function `autogen.agent_utils.gather_usage_summary(agents)` where you pass in a list of agents and gather the usage summary.\n", + "\n", + "## Caution when using Azure OpenAI!\n", + "If you are using azure OpenAI, the model returned from completion doesn't have the version information. The returned model is either 'gpt-35-turbo' or 'gpt-4'. From there, we are calculating the cost based on gpt-3.5-0613: ((0.0015, 0.002) per 1k prompt and completion tokens) and gpt-4-0613: (0.03,0.06). This means the cost is wrong if you are using the 1106 version of the models from azure OpenAI.\n", + "\n", + "This will be improved in the future. However, the token count summary is accurate. You can use the token count to calculate the cost yourself.\n", + "\n", "## Requirements\n", "\n", "AutoGen requires `Python>=3.8`:\n", @@ -43,12 +55,14 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import autogen\n", "from autogen import OpenAIWrapper\n", + "from autogen import AssistantAgent, UserProxyAgent\n", + "from autogen.agent_utils import gather_usage_summary\n", "\n", "# config_list = autogen.config_list_from_json(\n", "# \"OAI_CONFIG_LIST\",\n", @@ -79,7 +93,7 @@ " \"api_key\": \"\",\n", " }, # OpenAI API endpoint for gpt-4\n", " {\n", - " \"model\": \"gpt-35-turbo-0631\", # 0631 or newer is needed to use functions\n", + " \"model\": \"gpt-35-turbo-0613\", # 0613 or newer is needed to use functions\n", " \"base_url\": \"\", \n", " \"api_type\": \"azure\", \n", " \"api_version\": \"2023-08-01-preview\", # 2023-07-01-preview or newer is needed to use functions\n", @@ -100,15 +114,14 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "In update_usage_summary\n", - "0.0001555\n" + "0.0003535\n" ] } ], @@ -117,7 +130,7 @@ "messages = [\n", " {\"role\": \"user\", \"content\": \"Can you give me 3 useful tips on learning Python? Keep it simple and short.\"},\n", "]\n", - "response = client.create(messages=messages, model=\"gpt-35-turbo-1106\", cache_seed=None)\n", + "response = client.create(messages=messages, model=\"gpt-3.5-turbo\", cache_seed=None)\n", "print(response.cost)" ] }, @@ -125,7 +138,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Usage Summary\n", + "## Usage Summary for OpenAIWrapper\n", "\n", "When creating a instance of OpenAIWrapper, cost of all completions from the same instance is recorded. You can call `print_usage_summary()` to checkout your usage summary. To clear up, use `clear_usage_summary()`.\n" ] @@ -283,6 +296,209 @@ "response = client.create(messages=messages, model=\"gpt-35-turbo-1106\", cache_seed=41)\n", "client.print_usage_summary()" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Usage Summary for Agents\n", + "\n", + "- `Agent.print_usage_summary()` will print the cost summary for the agent.\n", + "- `Agent.get_actual_usage()` and `Agent.get_total_usage()` will return the usage summary in a dict. When an agent doesn't use LLM, they will return None.\n", + "- `Agent.reset()` will reset the usage summary.\n", + "- `autogen.agent_utils.gather_usage_summary` will gather the usage summary for a list of agents." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33mai_user\u001b[0m (to assistant):\n", + "\n", + "$x^3=125$. What is x?\n", + "\n", + "--------------------------------------------------------------------------------\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33massistant\u001b[0m (to ai_user):\n", + "\n", + "To find the value of x, we need to find the cube root of 125. \n", + "\n", + "The cube root of 125 is 5. \n", + "\n", + "Therefore, x = 5.\n", + "\n", + "--------------------------------------------------------------------------------\n", + "\u001b[33mai_user\u001b[0m (to assistant):\n", + "\n", + "Great job! Your answer is correct.\n", + "\n", + "Indeed, to find the value of x in the equation $x^3 = 125$, we need to find the cube root of 125. The cube root of 125 is indeed 5.\n", + "\n", + "Therefore, x = 5 is the correct solution. Well done!\n", + "\n", + "--------------------------------------------------------------------------------\n", + "\u001b[33massistant\u001b[0m (to ai_user):\n", + "\n", + "Thank you! I'm glad I could assist you. If you have any more questions, feel free to ask.\n", + "\n", + "--------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "\n", + "assistant = AssistantAgent(\n", + " \"assistant\",\n", + " system_message=\"You are a helpful assistant.\",\n", + " llm_config={\n", + " \"timeout\": 600,\n", + " \"cache_seed\": None,\n", + " \"config_list\": config_list,\n", + " },\n", + ")\n", + "\n", + "ai_user_proxy = UserProxyAgent(\n", + " name=\"ai_user\",\n", + " human_input_mode=\"NEVER\",\n", + " max_consecutive_auto_reply=1,\n", + " code_execution_config=False,\n", + " llm_config={\n", + " \"config_list\": config_list,\n", + " },\n", + " # In the system message the \"user\" always refers to the other agent.\n", + " system_message=\"You ask a user for help. You check the answer from the user and provide feedback.\",\n", + ")\n", + "assistant.reset()\n", + "\n", + "math_problem = \"$x^3=125$. What is x?\"\n", + "ai_user_proxy.initiate_chat(\n", + " assistant,\n", + " message=math_problem,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Agent 'ai_user':\n", + "----------------------------------------------------------------------------------------------------\n", + "Usage summary excluding cached usage: \n", + "Total cost: 0.00025\n", + "* Model 'gpt-35-turbo': cost: 0.00025, prompt_tokens: 80, completion_tokens: 63, total_tokens: 143\n", + "\n", + "All completions are non-cached: the total cost with cached completions is the same as actual cost.\n", + "----------------------------------------------------------------------------------------------------\n", + "\n", + "Agent 'assistant':\n", + "----------------------------------------------------------------------------------------------------\n", + "Usage summary excluding cached usage: \n", + "Total cost: 0.00036\n", + "* Model 'gpt-35-turbo': cost: 0.00036, prompt_tokens: 162, completion_tokens: 60, total_tokens: 222\n", + "\n", + "All completions are non-cached: the total cost with cached completions is the same as actual cost.\n", + "----------------------------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "ai_user_proxy.print_usage_summary()\n", + "print()\n", + "assistant.print_usage_summary()" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "No cost incurred from agent 'user'.\n" + ] + } + ], + "source": [ + "user_proxy = UserProxyAgent(\n", + " name=\"user\",\n", + " human_input_mode=\"NEVER\",\n", + " max_consecutive_auto_reply=2,\n", + " code_execution_config=False,\n", + " default_auto_reply=\"That's all. Thank you.\",\n", + ")\n", + "user_proxy.print_usage_summary()" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Actual usage summary for assistant (excluding completion from cache): {'total_cost': 0.00036300000000000004, 'gpt-35-turbo': {'cost': 0.00036300000000000004, 'prompt_tokens': 162, 'completion_tokens': 60, 'total_tokens': 222}}\n", + "Total usage summary for assistant (including completion from cache): {'total_cost': 0.00036300000000000004, 'gpt-35-turbo': {'cost': 0.00036300000000000004, 'prompt_tokens': 162, 'completion_tokens': 60, 'total_tokens': 222}}\n", + "Actual usage summary for ai_user_proxy: {'total_cost': 0.000246, 'gpt-35-turbo': {'cost': 0.000246, 'prompt_tokens': 80, 'completion_tokens': 63, 'total_tokens': 143}}\n", + "Total usage summary for ai_user_proxy: {'total_cost': 0.000246, 'gpt-35-turbo': {'cost': 0.000246, 'prompt_tokens': 80, 'completion_tokens': 63, 'total_tokens': 143}}\n", + "Actual usage summary for user_proxy: None\n", + "Total usage summary for user_proxy: None\n" + ] + } + ], + "source": [ + "print(\"Actual usage summary for assistant (excluding completion from cache):\", assistant.get_actual_usage())\n", + "print(\"Total usage summary for assistant (including completion from cache):\", assistant.get_total_usage())\n", + "\n", + "print(\"Actual usage summary for ai_user_proxy:\", ai_user_proxy.get_actual_usage())\n", + "print(\"Total usage summary for ai_user_proxy:\", ai_user_proxy.get_total_usage())\n", + "\n", + "print(\"Actual usage summary for user_proxy:\", user_proxy.get_actual_usage())\n", + "print(\"Total usage summary for user_proxy:\", user_proxy.get_total_usage())" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'total_cost': 0.0006090000000000001,\n", + " 'gpt-35-turbo': {'cost': 0.0006090000000000001,\n", + " 'prompt_tokens': 242,\n", + " 'completion_tokens': 123,\n", + " 'total_tokens': 365}}" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "total_usage_summary, actual_usage_summary = gather_usage_summary([assistant, ai_user_proxy, user_proxy])\n", + "total_usage_summary" + ] } ], "metadata": { @@ -301,7 +517,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.6" + "version": "3.9.18" } }, "nbformat": 4, diff --git a/test/agentchat/test_agent_usage.py b/test/agentchat/test_agent_usage.py new file mode 100644 index 000000000000..d5188cc561b8 --- /dev/null +++ b/test/agentchat/test_agent_usage.py @@ -0,0 +1,139 @@ +from autogen.agent_utils import gather_usage_summary +from autogen import AssistantAgent, UserProxyAgent +from test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST +import pytest +from conftest import skip_openai +import autogen +import io +from contextlib import redirect_stdout + +try: + import openai +except ImportError: + skip = True +else: + skip = False or skip_openai + + +@pytest.mark.skipif(skip, reason="openai not installed OR requested to skip") +def test_gathering(): + config_list = autogen.config_list_from_json( + OAI_CONFIG_LIST, + file_location=KEY_LOC, + ) + assistant1 = AssistantAgent( + "assistant", + system_message="You are a helpful assistant.", + llm_config={ + "config_list": config_list, + "model": "gpt-3.5-turbo-0613", + }, + ) + assistant2 = AssistantAgent( + "assistant", + system_message="You are a helpful assistant.", + llm_config={ + "config_list": config_list, + "model": "gpt-3.5-turbo-0613", + }, + ) + assistant3 = AssistantAgent( + "assistant", + system_message="You are a helpful assistant.", + llm_config={ + "config_list": config_list, + "model": "gpt-3.5-turbo-0613", + }, + ) + + assistant1.client.total_usage_summary = { + "total_cost": 0.1, + "gpt-35-turbo": {"cost": 0.1, "prompt_tokens": 100, "completion_tokens": 200, "total_tokens": 300}, + } + assistant2.client.total_usage_summary = { + "total_cost": 0.2, + "gpt-35-turbo": {"cost": 0.2, "prompt_tokens": 100, "completion_tokens": 200, "total_tokens": 300}, + } + assistant3.client.total_usage_summary = { + "total_cost": 0.3, + "gpt-4": {"cost": 0.3, "prompt_tokens": 100, "completion_tokens": 200, "total_tokens": 300}, + } + + total_usage, _ = gather_usage_summary([assistant1, assistant2, assistant3]) + + assert round(total_usage["total_cost"], 8) == 0.6 + assert round(total_usage["gpt-35-turbo"]["cost"], 8) == 0.3 + assert round(total_usage["gpt-4"]["cost"], 8) == 0.3 + + # test when agent doesn't have client + user_proxy = UserProxyAgent( + name="ai_user", + human_input_mode="NEVER", + max_consecutive_auto_reply=2, + code_execution_config=False, + default_auto_reply="That's all. Thank you.", + ) + + total_usage, acutal_usage = gather_usage_summary([user_proxy]) + + +@pytest.mark.skipif(skip, reason="openai not installed OR requested to skip") +def test_agent_usage(): + config_list = autogen.config_list_from_json( + OAI_CONFIG_LIST, + file_location=KEY_LOC, + ) + assistant = AssistantAgent( + "assistant", + system_message="You are a helpful assistant.", + llm_config={ + "timeout": 600, + "cache_seed": None, + "config_list": config_list, + "model": "gpt-3.5-turbo-0613", + }, + ) + + ai_user_proxy = UserProxyAgent( + name="ai_user", + human_input_mode="NEVER", + max_consecutive_auto_reply=1, + code_execution_config=False, + llm_config={ + "config_list": config_list, + "model": "gpt-3.5-turbo-0613", + }, + # In the system message the "user" always refers to the other agent. + system_message="You ask a user for help. You check the answer from the user and provide feedback.", + ) + + math_problem = "$x^3=125$. What is x?" + ai_user_proxy.initiate_chat( + assistant, + message=math_problem, + ) + + # test print + captured_output = io.StringIO() + with redirect_stdout(captured_output): + ai_user_proxy.print_usage_summary() + output = captured_output.getvalue() + assert "Usage summary excluding cached usage:" in output + + captured_output = io.StringIO() + with redirect_stdout(captured_output): + assistant.print_usage_summary() + output = captured_output.getvalue() + assert "All completions are non-cached:" in output + + # test get + print("Actual usage summary (excluding completion from cache):", assistant.get_actual_usage()) + print("Total usage summary (including completion from cache):", assistant.get_total_usage()) + + print("Actual usage summary (excluding completion from cache):", ai_user_proxy.get_actual_usage()) + print("Total usage summary (including completion from cache):", ai_user_proxy.get_total_usage()) + + +if __name__ == "__main__": + test_gathering() + test_agent_usage() diff --git a/test/test_notebook.py b/test/test_notebook.py index 6e8f80b5f8ab..109410dfc0e8 100644 --- a/test/test_notebook.py +++ b/test/test_notebook.py @@ -122,8 +122,8 @@ def test_graph_modelling_language_using_select_speaker(save=False): skip or not sys.version.startswith("3.10"), reason="do not run if openai is not installed or py!=3.10", ) -def test_oai_client_cost(save=False): - run_notebook("oai_client_cost.ipynb", save=save) +def test_agentchat_cost_token_tracking(save=False): + run_notebook("agentchat_cost_token_tracking.ipynb", save=save) if __name__ == "__main__":