diff --git a/autogen/oai/completion.py b/autogen/oai/completion.py index 595cbe3bed00..f3ea897de6b6 100644 --- a/autogen/oai/completion.py +++ b/autogen/oai/completion.py @@ -9,6 +9,7 @@ from flaml.tune.space import is_constant from flaml.automl.logger import logger_formatter from .openai_utils import get_key +from collections import defaultdict try: import openai @@ -157,6 +158,7 @@ def _book_keeping(cls, config: Dict, response): value = { "created_at": [], "cost": [], + "token_count": [], } if "messages" in config: messages = config["messages"] @@ -168,6 +170,14 @@ def _book_keeping(cls, config: Dict, response): key = get_key([config["prompt"]] + [choice.get("text") for choice in response["choices"]]) value["created_at"].append(cls._count_create) value["cost"].append(response["cost"]) + value["token_count"].append( + { + "model": response["model"], + "prompt_tokens": response["usage"]["prompt_tokens"], + "completion_tokens": response["usage"].get("completion_tokens", 0), + "total_tokens": response["usage"]["total_tokens"], + } + ) cls._history_dict[key] = value cls._count_create += 1 return @@ -1067,6 +1077,44 @@ def logged_history(cls) -> Dict: """Return the book keeping dictionary.""" return cls._history_dict + @classmethod + def print_usage_summary(cls) -> Dict: + """Return the usage summary.""" + if cls._history_dict is None: + print("No usage summary available.", flush=True) + + token_count_summary = defaultdict(lambda: {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}) + + if not cls._history_compact: + source = cls._history_dict.values() + total_cost = sum(msg_pair["response"]["cost"] for msg_pair in source) + else: + # source = cls._history_dict["token_count"] + # total_cost = sum(cls._history_dict['cost']) + total_cost = sum(sum(value_list["cost"]) for value_list in cls._history_dict.values()) + source = ( + token_data for value_list in cls._history_dict.values() for token_data in value_list["token_count"] + ) + + for entry in source: + if not cls._history_compact: + model = entry["response"]["model"] + token_data = entry["response"]["usage"] + else: + model = entry["model"] + token_data = entry + + token_count_summary[model]["prompt_tokens"] += token_data["prompt_tokens"] + token_count_summary[model]["completion_tokens"] += token_data["completion_tokens"] + token_count_summary[model]["total_tokens"] += token_data["total_tokens"] + + print(f"Total cost: {total_cost}", flush=True) + for model, counts in token_count_summary.items(): + print( + f"Token count summary for model {model}: prompt_tokens: {counts['prompt_tokens']}, completion_tokens: {counts['completion_tokens']}, total_tokens: {counts['total_tokens']}", + flush=True, + ) + @classmethod def start_logging( cls, history_dict: Optional[Dict] = None, compact: Optional[bool] = True, reset_counter: Optional[bool] = True diff --git a/test/agentchat/test_assistant_agent.py b/test/agentchat/test_assistant_agent.py index c36cc35e4fb0..1239bde453b9 100644 --- a/test/agentchat/test_assistant_agent.py +++ b/test/agentchat/test_assistant_agent.py @@ -148,9 +148,11 @@ def test_create_execute_script(human_input_mode="NEVER", max_consecutive_auto_re ```""", ) print(conversations) + autogen.ChatCompletion.print_usage_summary() autogen.ChatCompletion.start_logging(compact=False) user.send("""Execute temp.py""", assistant) print(autogen.ChatCompletion.logged_history) + autogen.ChatCompletion.print_usage_summary() autogen.ChatCompletion.stop_logging() diff --git a/website/docs/Use-Cases/enhanced_inference.md b/website/docs/Use-Cases/enhanced_inference.md index 0d33fa181377..2766fd9a483f 100644 --- a/website/docs/Use-Cases/enhanced_inference.md +++ b/website/docs/Use-Cases/enhanced_inference.md @@ -260,6 +260,10 @@ The API calls made after this will be automatically logged. They can be retrieve ```python autogen.ChatCompletion.logged_history ``` +There is a function that can be used to print usage summary (total cost, and token count usage from each model): +```python +autogen.ChatCompletion.print_usage_summary() +``` To stop logging, use ```python autogen.ChatCompletion.stop_logging() @@ -366,5 +370,13 @@ Set `compact=False` in `start_logging()` to switch. }, } ``` + +* Example of printing for usage summary +``` +Total cost: +Token count summary for model : prompt_tokens: , completion_tokens: , total_tokens: +``` + + It can be seen that the individual API call history contains redundant information of the conversation. For a long conversation the degree of redundancy is high. The compact history is more efficient and the individual API call history contains more details.