From e04dd7b70b0c864ef52dd899ab38a3532a7c5c7d Mon Sep 17 00:00:00 2001 From: Chi Wang Date: Wed, 14 Jun 2023 18:07:26 +0000 Subject: [PATCH 1/5] update openai model support --- flaml/autogen/agent/assistant_agent.py | 1 + flaml/autogen/oai/completion.py | 41 ++++++++++++++++++----- setup.py | 4 +-- website/docs/Use-Cases/Auto-Generation.md | 4 +-- 4 files changed, 37 insertions(+), 13 deletions(-) diff --git a/flaml/autogen/agent/assistant_agent.py b/flaml/autogen/agent/assistant_agent.py index 09e0ae0761..0381cfdb97 100644 --- a/flaml/autogen/agent/assistant_agent.py +++ b/flaml/autogen/agent/assistant_agent.py @@ -39,6 +39,7 @@ def receive(self, message, sender): self._conversations[sender.name] = [{"content": self._system_message, "role": "system"}] super().receive(message, sender) responses = oai.ChatCompletion.create(messages=self._conversations[sender.name], **self._config) + # TODO: handle function_call response = oai.ChatCompletion.extract_text(responses)[0] self._send(response, sender) diff --git a/flaml/autogen/oai/completion.py b/flaml/autogen/oai/completion.py index e7b4a50319..cf760d3e49 100644 --- a/flaml/autogen/oai/completion.py +++ b/flaml/autogen/oai/completion.py @@ -45,12 +45,16 @@ class Completion(openai_Completion): # set of models that support chat completion chat_models = { "gpt-3.5-turbo", - "gpt-3.5-turbo-0301", + "gpt-3.5-turbo-0301", # deprecate in Sep + "gpt-3.5-turbo-0613", + "gpt-3.5-turbo-16k", "gpt-35-turbo", "gpt-4", "gpt-4-32k", - "gpt-4-32k-0314", - "gpt-4-0314", + "gpt-4-32k-0314", # deprecate in Sep + "gpt-4-0314", # deprecate in Sep + "gpt-4-0613", + "gpt-4-32k-0613", } # price per 1k tokens @@ -62,13 +66,17 @@ class Completion(openai_Completion): "code-davinci-002": 0.1, "text-davinci-002": 0.02, "text-davinci-003": 0.02, - "gpt-3.5-turbo": 0.002, - "gpt-3.5-turbo-0301": 0.002, + "gpt-3.5-turbo": (0.0015, 0.002), + "gpt-3.5-turbo-0301": (0.0015, 0.002), # deprecate in Sep + "gpt-3.5-turbo-0613": (0.0015, 0.002), + "gpt-3.5-turbo-16k": (0.003, 0.004), "gpt-35-turbo": 0.002, "gpt-4": (0.03, 0.06), - "gpt-4-0314": (0.03, 0.06), "gpt-4-32k": (0.06, 0.12), - "gpt-4-32k-0314": (0.06, 0.12), + "gpt-4-0314": (0.03, 0.06), # deprecate in Sep + "gpt-4-32k-0314": (0.06, 0.12), # deprecate in Sep + "gpt-4-0613": (0.03, 0.06), + "gpt-4-32k-0613": (0.06, 0.12), } default_search_space = { @@ -386,7 +394,7 @@ def _eval(cls, config: dict, prune=True, eval_only=False): result["cost"] = cost return result # evaluate the quality of the responses - responses = cls.extract_text(response) + responses = cls.extract_text_or_function_call(response) usage = response["usage"] n_input_tokens = usage["prompt_tokens"] n_output_tokens = usage.get("completion_tokens", 0) @@ -898,7 +906,7 @@ def eval_func(responses, **data): response = cls.create(data_i, use_cache, **config) cost += response["cost"] # evaluate the quality of the responses - responses = cls.extract_text(response) + responses = cls.extract_text_or_function_call(response) if eval_func is not None: metrics = eval_func(responses, **data_i) elif hasattr(cls, "_eval_func"): @@ -991,6 +999,21 @@ def extract_text(cls, response: dict) -> List[str]: return [choice["text"] for choice in choices] return [choice["message"].get("content", "") for choice in choices] + @classmethod + def extract_text_or_function_call(cls, response: dict) -> List[str]: + """Extract the text or function calls from a completion or chat response. + + Args: + response (dict): The response from OpenAI API. + + Returns: + A list of function calls in the responses. + """ + choices = response["choices"] + if "text" in choices[0]: + return [choice["text"] for choice in choices] + return [choice["message"].get("content") or choice["message"].get("function_call", "") for choice in choices] + @classmethod @property def logged_history(cls) -> Dict: diff --git a/setup.py b/setup.py index 33c8604775..c29272b385 100644 --- a/setup.py +++ b/setup.py @@ -127,8 +127,8 @@ "pytorch-forecasting>=0.9.0", ], "benchmark": ["catboost>=0.26", "psutil==5.8.0", "xgboost==1.3.3", "pandas==1.1.4"], - "openai": ["openai==0.27.4", "diskcache"], - "autogen": ["openai==0.27.4", "diskcache", "docker"], + "openai": ["openai==0.27.8", "diskcache"], + "autogen": ["openai==0.27.8", "diskcache", "docker"], "synapse": [ "joblibspark>=0.5.0", "optuna==2.8.0", diff --git a/website/docs/Use-Cases/Auto-Generation.md b/website/docs/Use-Cases/Auto-Generation.md index 3aad9242e9..5b57b5c839 100644 --- a/website/docs/Use-Cases/Auto-Generation.md +++ b/website/docs/Use-Cases/Auto-Generation.md @@ -368,14 +368,14 @@ Set `compact=False` in `start_logging()` to switch. }, } ``` -It can be seen that the individual API call history contain redundant information of the conversation. For a long conversation the degree of redundancy is high. +It can be seen that the individual API call history contains redundant information of the conversation. For a long conversation the degree of redundancy is high. The compact history is more efficient and the individual API call history contains more details. ### Other Utilities - a [`cost`](../reference/autogen/oai/completion#cost) function to calculate the cost of an API call. - a [`test`](../reference/autogen/oai/completion#test) function to conveniently evaluate the configuration over test data. -- a [`extract_text`](../reference/autogen/oai/completion#extract_text) function to extract the text from a completion or chat response. +- an [`extract_text_or_function_call`](../reference/autogen/oai/completion#extract_text_or_function_call) function to extract the text or function call from a completion or chat response. ## Agents (Experimental) From 6256a78d88bd402a66b8d69f1fd21f91b25cf5f9 Mon Sep 17 00:00:00 2001 From: Chi Wang Date: Wed, 14 Jun 2023 22:42:45 +0000 Subject: [PATCH 2/5] new gpt3.5 --- test/autogen/test_assistant_agent.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/autogen/test_assistant_agent.py b/test/autogen/test_assistant_agent.py index c5230930b3..46bacb8849 100644 --- a/test/autogen/test_assistant_agent.py +++ b/test/autogen/test_assistant_agent.py @@ -11,10 +11,10 @@ def test_gpt35(human_input_mode="NEVER", max_consecutive_auto_reply=5): import openai except ImportError: return - config_list = oai.config_list_from_models(key_file_path=KEY_LOC, model_list=["gpt-3.5-turbo"]) + config_list = oai.config_list_from_models(key_file_path=KEY_LOC, model_list=["gpt-3.5-turbo-0613"]) assistant = AssistantAgent( "coding_agent", - request_timeout=600, + # request_timeout=600, seed=40, max_tokens=1024, config_list=config_list, From 2199998b0d058b2e757cc817c9ef38f7d71a99cc Mon Sep 17 00:00:00 2001 From: Chi Wang Date: Thu, 15 Jun 2023 02:59:39 +0000 Subject: [PATCH 3/5] docstr --- flaml/autogen/oai/completion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flaml/autogen/oai/completion.py b/flaml/autogen/oai/completion.py index cf760d3e49..a7e10403a7 100644 --- a/flaml/autogen/oai/completion.py +++ b/flaml/autogen/oai/completion.py @@ -1007,7 +1007,7 @@ def extract_text_or_function_call(cls, response: dict) -> List[str]: response (dict): The response from OpenAI API. Returns: - A list of function calls in the responses. + A list of text or function calls in the responses. """ choices = response["choices"] if "text" in choices[0]: From 1b6dcbd9648a7e9a61345358f09caf6f3cced938 Mon Sep 17 00:00:00 2001 From: Chi Wang Date: Thu, 15 Jun 2023 03:36:49 +0000 Subject: [PATCH 4/5] function_call and content may co-exist --- flaml/autogen/oai/completion.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/flaml/autogen/oai/completion.py b/flaml/autogen/oai/completion.py index a7e10403a7..d437559823 100644 --- a/flaml/autogen/oai/completion.py +++ b/flaml/autogen/oai/completion.py @@ -1012,7 +1012,10 @@ def extract_text_or_function_call(cls, response: dict) -> List[str]: choices = response["choices"] if "text" in choices[0]: return [choice["text"] for choice in choices] - return [choice["message"].get("content") or choice["message"].get("function_call", "") for choice in choices] + return [ + choice["message"] if "function_call" in choice["message"] else choice["message"].get("content", "") + for choice in choices + ] @classmethod @property From f68afe73108d518e9318691afd06b8b56caf707b Mon Sep 17 00:00:00 2001 From: Chi Wang Date: Thu, 15 Jun 2023 22:30:25 +0000 Subject: [PATCH 5/5] test function call --- test/autogen/test_function_call.py | 63 ++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 test/autogen/test_function_call.py diff --git a/test/autogen/test_function_call.py b/test/autogen/test_function_call.py new file mode 100644 index 0000000000..9a86ae9414 --- /dev/null +++ b/test/autogen/test_function_call.py @@ -0,0 +1,63 @@ +try: + import openai +except ImportError: + openai = None +import pytest +import json +from flaml import oai +from flaml.autogen.math_utils import eval_math_responses + +KEY_LOC = "test/autogen" + + +@pytest.mark.skipif(openai is None, reason="openai not installed") +def test_eval_math_responses(): + config_list = oai.config_list_openai_aoai(KEY_LOC, exclude="aoai") + functions = [ + { + "name": "eval_math_responses", + "description": "Select a response for a math problem using voting, and check if the response is correct if the solution is provided", + "parameters": { + "type": "object", + "properties": { + "responses": { + "type": "string", + "description": "The responses in a list", + }, + "solution": { + "type": "string", + "description": "The canonical solution", + }, + }, + "required": ["responses"], + }, + }, + ] + response = oai.ChatCompletion.create( + model="gpt-3.5-turbo-0613", + config_list=config_list, + messages=[ + { + "role": "user", + "content": 'evaluate the math responses ["1", "5/2", "5/2"] against the true answer \\frac{5}{2}', + }, + ], + functions=functions, + ) + print(response) + responses = oai.ChatCompletion.extract_text_or_function_call(response) + print(responses[0]) + function_call = responses[0]["function_call"] + name, arguments = function_call["name"], json.loads(function_call["arguments"]) + assert name == "eval_math_responses" + print(arguments["responses"]) + if isinstance(arguments["responses"], str): + arguments["responses"] = json.loads(arguments["responses"]) + arguments["responses"] = [f"\\boxed{{{x}}}" for x in arguments["responses"]] + print(arguments["responses"]) + arguments["solution"] = f"\\boxed{{{arguments['solution']}}}" + print(eval_math_responses(**arguments)) + + +if __name__ == "__main__": + test_eval_math_responses()