From d3f02487ad44dd82cc7267a73682cdf6b282091f Mon Sep 17 00:00:00 2001 From: kevin666aa Date: Sat, 25 Nov 2023 21:04:06 -0500 Subject: [PATCH 1/5] add cost calculation --- autogen/oai/client.py | 19 ++++++++++++++++--- autogen/oai/openai_utils.py | 30 ++++++++++++++++++++++++++++++ test/oai/test_client.py | 13 +++++++++++++ 3 files changed, 59 insertions(+), 3 deletions(-) diff --git a/autogen/oai/client.py b/autogen/oai/client.py index ee59f06e8095..ec7ac5b4d772 100644 --- a/autogen/oai/client.py +++ b/autogen/oai/client.py @@ -7,7 +7,7 @@ import inspect from flaml.automl.logger import logger_formatter -from autogen.oai.openai_utils import get_key +from autogen.oai.openai_utils import get_key, oai_price1k from autogen.token_count_utils import count_token try: @@ -240,7 +240,7 @@ def yes_or_no_filter(context, response): # Return the response if it passes the filter or it is the last client response.config_id = i response.pass_filter = pass_filter - # TODO: add response.cost + response.cost = self.cost(response) return response continue # filter is not passed; try the next config try: @@ -261,10 +261,23 @@ def yes_or_no_filter(context, response): # Return the response if it passes the filter or it is the last client response.config_id = i response.pass_filter = pass_filter - # TODO: add response.cost + response.cost = self.cost(response) return response continue # filter is not passed; try the next config + def cost(self, response): + """Calculate the cost of the response.""" + model = response.model + if model not in oai_price1k: + return 0 + + n_input_tokens = response.usage.prompt_tokens + n_output_tokens = response.usage.completion_tokens + tmp_price1K = oai_price1k[model] + if isinstance(tmp_price1K, tuple): + return (tmp_price1K[0] * n_input_tokens + tmp_price1K[1] * n_output_tokens) / 1000 + return tmp_price1K * (n_input_tokens + n_output_tokens) / 1000 + def _completions_create(self, client, params): completions = client.chat.completions if "messages" in params else client.completions # If streaming is enabled, has messages, and does not have functions, then diff --git a/autogen/oai/openai_utils.py b/autogen/oai/openai_utils.py index 637e874055a3..966646cdb802 100644 --- a/autogen/oai/openai_utils.py +++ b/autogen/oai/openai_utils.py @@ -9,6 +9,36 @@ NON_CACHE_KEY = ["api_key", "base_url", "api_type", "api_version"] +oai_price1k = { + "text-ada-001": 0.0004, + "text-babbage-001": 0.0005, + "text-curie-001": 0.002, + "code-cushman-001": 0.024, + "code-davinci-002": 0.1, + "text-davinci-002": 0.02, + "text-davinci-003": 0.02, + "gpt-3.5-turbo-instruct": (0.0015, 0.002), + "gpt-3.5-turbo-0301": (0.0015, 0.002), # deprecate in Sep + "gpt-3.5-turbo-0613": (0.0015, 0.002), + "gpt-3.5-turbo-16k": (0.003, 0.004), + "gpt-3.5-turbo-16k-0613": (0.003, 0.004), + "gpt-35-turbo": (0.0015, 0.002), + "gpt-35-turbo-16k": (0.003, 0.004), + "gpt-35-turbo-instruct": (0.0015, 0.002), + "gpt-4": (0.03, 0.06), + "gpt-4-32k": (0.06, 0.12), + "gpt-4-0314": (0.03, 0.06), # deprecate in Sep + "gpt-4-32k-0314": (0.06, 0.12), # deprecate in Sep + "gpt-4-0613": (0.03, 0.06), + "gpt-4-32k-0613": (0.06, 0.12), + # 11-06 + "gpt-3.5-turbo": (0.001, 0.002), + "gpt-3.5-turbo-1106": (0.001, 0.002), + "gpt-35-turbo-1106": (0.001, 0.002), + "gpt-4-1106-preview": (0.01, 0.03), + "gpt-4-1106-vision-preview": (0.01, 0.03), # TODO: support vision pricing of images +} + def get_key(config): """Get a unique identifier of a configuration. diff --git a/test/oai/test_client.py b/test/oai/test_client.py index 83e01bceddf5..6c637efd13c7 100644 --- a/test/oai/test_client.py +++ b/test/oai/test_client.py @@ -48,7 +48,20 @@ def test_completion(): print(client.extract_text_or_function_call(response)) +@pytest.mark.skipif(skip, reason="openai>=1 not installed") +def test_cost(): + config_list = config_list_openai_aoai(KEY_LOC) + client = OpenAIWrapper(config_list=config_list, cache_seed=None) + response = client.create(prompt="1+3=", model="gpt-3.5-turbo-instruct") + print(response.cost) + + client = OpenAIWrapper(config_list=config_list, cache_seed=42) + response = client.create(prompt="1+3=", model="gpt-3.5-turbo-instruct") + print(response.cost) + + if __name__ == "__main__": test_aoai_chat_completion() test_chat_completion() test_completion() + test_cost() From 60d2641ad89a5ce7026327157b9e64fd188bd3be Mon Sep 17 00:00:00 2001 From: Yiran Wu <32823396+kevin666aa@users.noreply.github.com> Date: Sun, 26 Nov 2023 23:31:06 -0500 Subject: [PATCH 2/5] Update autogen/oai/client.py Co-authored-by: Joshua Kim --- autogen/oai/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autogen/oai/client.py b/autogen/oai/client.py index ec7ac5b4d772..79f04c412add 100644 --- a/autogen/oai/client.py +++ b/autogen/oai/client.py @@ -265,7 +265,7 @@ def yes_or_no_filter(context, response): return response continue # filter is not passed; try the next config - def cost(self, response): + def cost(self, response: Union[ChatCompletion, Completion]) -> float: """Calculate the cost of the response.""" model = response.model if model not in oai_price1k: From 02bf64df144d286aeab1988e3fa9ce77ae457320 Mon Sep 17 00:00:00 2001 From: Yiran Wu <32823396+kevin666aa@users.noreply.github.com> Date: Sun, 26 Nov 2023 23:31:59 -0500 Subject: [PATCH 3/5] Update autogen/oai/client.py Co-authored-by: Joshua Kim --- autogen/oai/client.py | 1 + 1 file changed, 1 insertion(+) diff --git a/autogen/oai/client.py b/autogen/oai/client.py index 79f04c412add..040fb1dc1293 100644 --- a/autogen/oai/client.py +++ b/autogen/oai/client.py @@ -274,6 +274,7 @@ def cost(self, response: Union[ChatCompletion, Completion]) -> float: n_input_tokens = response.usage.prompt_tokens n_output_tokens = response.usage.completion_tokens tmp_price1K = oai_price1k[model] + # First value is input token rate, second value is output token rate if isinstance(tmp_price1K, tuple): return (tmp_price1K[0] * n_input_tokens + tmp_price1K[1] * n_output_tokens) / 1000 return tmp_price1K * (n_input_tokens + n_output_tokens) / 1000 From 2a4bea49d615585e1ea16202ce737f27239d874b Mon Sep 17 00:00:00 2001 From: kevin666aa Date: Sun, 26 Nov 2023 23:46:41 -0500 Subject: [PATCH 4/5] update --- autogen/oai/client.py | 3 ++- test/oai/test_client.py | 18 +++++++++++------- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/autogen/oai/client.py b/autogen/oai/client.py index 040fb1dc1293..b4a139401ad2 100644 --- a/autogen/oai/client.py +++ b/autogen/oai/client.py @@ -2,7 +2,7 @@ import os import sys -from typing import List, Optional, Dict, Callable +from typing import List, Optional, Dict, Callable, Union import logging import inspect from flaml.automl.logger import logger_formatter @@ -269,6 +269,7 @@ def cost(self, response: Union[ChatCompletion, Completion]) -> float: """Calculate the cost of the response.""" model = response.model if model not in oai_price1k: + # TODO: add logging to warn that the model is not found return 0 n_input_tokens = response.usage.prompt_tokens diff --git a/test/oai/test_client.py b/test/oai/test_client.py index 6c637efd13c7..3f881fce393f 100644 --- a/test/oai/test_client.py +++ b/test/oai/test_client.py @@ -49,14 +49,18 @@ def test_completion(): @pytest.mark.skipif(skip, reason="openai>=1 not installed") -def test_cost(): +@pytest.mark.parametrize( + "cache_seed, model", + [ + (None, "gpt-3.5-turbo-instruct"), + (42, "gpt-3.5-turbo-instruct"), + (None, "text-ada-001"), + ], +) +def test_cost(cache_seed, model): config_list = config_list_openai_aoai(KEY_LOC) - client = OpenAIWrapper(config_list=config_list, cache_seed=None) - response = client.create(prompt="1+3=", model="gpt-3.5-turbo-instruct") - print(response.cost) - - client = OpenAIWrapper(config_list=config_list, cache_seed=42) - response = client.create(prompt="1+3=", model="gpt-3.5-turbo-instruct") + client = OpenAIWrapper(config_list=config_list, cache_seed=cache_seed) + response = client.create(prompt="1+3=", model=model) print(response.cost) From 66ba07e7b9c2c8bbd343858278fbbfccc02b800e Mon Sep 17 00:00:00 2001 From: kevin666aa Date: Mon, 27 Nov 2023 20:34:26 -0500 Subject: [PATCH 5/5] add doc --- website/docs/Use-Cases/enhanced_inference.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/website/docs/Use-Cases/enhanced_inference.md b/website/docs/Use-Cases/enhanced_inference.md index ffd4fd60f789..855168e99619 100644 --- a/website/docs/Use-Cases/enhanced_inference.md +++ b/website/docs/Use-Cases/enhanced_inference.md @@ -122,6 +122,8 @@ client = OpenAIWrapper() response = client.create(messages=[{"role": "user", "content": "2+2="}], model="gpt-3.5-turbo") # extract the response text print(client.extract_text_or_function_call(response)) +# get cost of this completion +print(response.cost) # Azure OpenAI endpoint client = OpenAIWrapper(api_key=..., base_url=..., api_version=..., api_type="azure") # Completion