From e04dd7b70b0c864ef52dd899ab38a3532a7c5c7d Mon Sep 17 00:00:00 2001
From: Chi Wang <wang.chi@microsoft.com>
Date: Wed, 14 Jun 2023 18:07:26 +0000
Subject: [PATCH 1/5] update openai model support

---
 flaml/autogen/agent/assistant_agent.py    |  1 +
 flaml/autogen/oai/completion.py           | 41 ++++++++++++++++++-----
 setup.py                                  |  4 +--
 website/docs/Use-Cases/Auto-Generation.md |  4 +--
 4 files changed, 37 insertions(+), 13 deletions(-)

diff --git a/flaml/autogen/agent/assistant_agent.py b/flaml/autogen/agent/assistant_agent.py
index 09e0ae0761..0381cfdb97 100644
--- a/flaml/autogen/agent/assistant_agent.py
+++ b/flaml/autogen/agent/assistant_agent.py
@@ -39,6 +39,7 @@ def receive(self, message, sender):
             self._conversations[sender.name] = [{"content": self._system_message, "role": "system"}]
         super().receive(message, sender)
         responses = oai.ChatCompletion.create(messages=self._conversations[sender.name], **self._config)
+        # TODO: handle function_call
         response = oai.ChatCompletion.extract_text(responses)[0]
         self._send(response, sender)
 
diff --git a/flaml/autogen/oai/completion.py b/flaml/autogen/oai/completion.py
index e7b4a50319..cf760d3e49 100644
--- a/flaml/autogen/oai/completion.py
+++ b/flaml/autogen/oai/completion.py
@@ -45,12 +45,16 @@ class Completion(openai_Completion):
     # set of models that support chat completion
     chat_models = {
         "gpt-3.5-turbo",
-        "gpt-3.5-turbo-0301",
+        "gpt-3.5-turbo-0301",  # deprecate in Sep
+        "gpt-3.5-turbo-0613",
+        "gpt-3.5-turbo-16k",
         "gpt-35-turbo",
         "gpt-4",
         "gpt-4-32k",
-        "gpt-4-32k-0314",
-        "gpt-4-0314",
+        "gpt-4-32k-0314",  # deprecate in Sep
+        "gpt-4-0314",  # deprecate in Sep
+        "gpt-4-0613",
+        "gpt-4-32k-0613",
     }
 
     # price per 1k tokens
@@ -62,13 +66,17 @@ class Completion(openai_Completion):
         "code-davinci-002": 0.1,
         "text-davinci-002": 0.02,
         "text-davinci-003": 0.02,
-        "gpt-3.5-turbo": 0.002,
-        "gpt-3.5-turbo-0301": 0.002,
+        "gpt-3.5-turbo": (0.0015, 0.002),
+        "gpt-3.5-turbo-0301": (0.0015, 0.002),  # deprecate in Sep
+        "gpt-3.5-turbo-0613": (0.0015, 0.002),
+        "gpt-3.5-turbo-16k": (0.003, 0.004),
         "gpt-35-turbo": 0.002,
         "gpt-4": (0.03, 0.06),
-        "gpt-4-0314": (0.03, 0.06),
         "gpt-4-32k": (0.06, 0.12),
-        "gpt-4-32k-0314": (0.06, 0.12),
+        "gpt-4-0314": (0.03, 0.06),  # deprecate in Sep
+        "gpt-4-32k-0314": (0.06, 0.12),  # deprecate in Sep
+        "gpt-4-0613": (0.03, 0.06),
+        "gpt-4-32k-0613": (0.06, 0.12),
     }
 
     default_search_space = {
@@ -386,7 +394,7 @@ def _eval(cls, config: dict, prune=True, eval_only=False):
                         result["cost"] = cost
                         return result
                     # evaluate the quality of the responses
-                    responses = cls.extract_text(response)
+                    responses = cls.extract_text_or_function_call(response)
                     usage = response["usage"]
                     n_input_tokens = usage["prompt_tokens"]
                     n_output_tokens = usage.get("completion_tokens", 0)
@@ -898,7 +906,7 @@ def eval_func(responses, **data):
             response = cls.create(data_i, use_cache, **config)
             cost += response["cost"]
             # evaluate the quality of the responses
-            responses = cls.extract_text(response)
+            responses = cls.extract_text_or_function_call(response)
             if eval_func is not None:
                 metrics = eval_func(responses, **data_i)
             elif hasattr(cls, "_eval_func"):
@@ -991,6 +999,21 @@ def extract_text(cls, response: dict) -> List[str]:
             return [choice["text"] for choice in choices]
         return [choice["message"].get("content", "") for choice in choices]
 
+    @classmethod
+    def extract_text_or_function_call(cls, response: dict) -> List[str]:
+        """Extract the text or function calls from a completion or chat response.
+
+        Args:
+            response (dict): The response from OpenAI API.
+
+        Returns:
+            A list of function calls in the responses.
+        """
+        choices = response["choices"]
+        if "text" in choices[0]:
+            return [choice["text"] for choice in choices]
+        return [choice["message"].get("content") or choice["message"].get("function_call", "") for choice in choices]
+
     @classmethod
     @property
     def logged_history(cls) -> Dict:
diff --git a/setup.py b/setup.py
index 33c8604775..c29272b385 100644
--- a/setup.py
+++ b/setup.py
@@ -127,8 +127,8 @@
             "pytorch-forecasting>=0.9.0",
         ],
         "benchmark": ["catboost>=0.26", "psutil==5.8.0", "xgboost==1.3.3", "pandas==1.1.4"],
-        "openai": ["openai==0.27.4", "diskcache"],
-        "autogen": ["openai==0.27.4", "diskcache", "docker"],
+        "openai": ["openai==0.27.8", "diskcache"],
+        "autogen": ["openai==0.27.8", "diskcache", "docker"],
         "synapse": [
             "joblibspark>=0.5.0",
             "optuna==2.8.0",
diff --git a/website/docs/Use-Cases/Auto-Generation.md b/website/docs/Use-Cases/Auto-Generation.md
index 3aad9242e9..5b57b5c839 100644
--- a/website/docs/Use-Cases/Auto-Generation.md
+++ b/website/docs/Use-Cases/Auto-Generation.md
@@ -368,14 +368,14 @@ Set `compact=False` in `start_logging()` to switch.
     },
 }
 ```
-It can be seen that the individual API call history contain redundant information of the conversation. For a long conversation the degree of redundancy is high.
+It can be seen that the individual API call history contains redundant information of the conversation. For a long conversation the degree of redundancy is high.
 The compact history is more efficient and the individual API call history contains more details.
 
 ### Other Utilities
 
 - a [`cost`](../reference/autogen/oai/completion#cost) function to calculate the cost of an API call.
 - a [`test`](../reference/autogen/oai/completion#test) function to conveniently evaluate the configuration over test data.
-- a [`extract_text`](../reference/autogen/oai/completion#extract_text) function to extract the text from a completion or chat response.
+- an [`extract_text_or_function_call`](../reference/autogen/oai/completion#extract_text_or_function_call) function to extract the text or function call from a completion or chat response.
 
 
 ## Agents (Experimental)

From 6256a78d88bd402a66b8d69f1fd21f91b25cf5f9 Mon Sep 17 00:00:00 2001
From: Chi Wang <wang.chi@microsoft.com>
Date: Wed, 14 Jun 2023 22:42:45 +0000
Subject: [PATCH 2/5] new gpt3.5

---
 test/autogen/test_assistant_agent.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/autogen/test_assistant_agent.py b/test/autogen/test_assistant_agent.py
index c5230930b3..46bacb8849 100644
--- a/test/autogen/test_assistant_agent.py
+++ b/test/autogen/test_assistant_agent.py
@@ -11,10 +11,10 @@ def test_gpt35(human_input_mode="NEVER", max_consecutive_auto_reply=5):
         import openai
     except ImportError:
         return
-    config_list = oai.config_list_from_models(key_file_path=KEY_LOC, model_list=["gpt-3.5-turbo"])
+    config_list = oai.config_list_from_models(key_file_path=KEY_LOC, model_list=["gpt-3.5-turbo-0613"])
     assistant = AssistantAgent(
         "coding_agent",
-        request_timeout=600,
+        # request_timeout=600,
         seed=40,
         max_tokens=1024,
         config_list=config_list,

From 2199998b0d058b2e757cc817c9ef38f7d71a99cc Mon Sep 17 00:00:00 2001
From: Chi Wang <wang.chi@microsoft.com>
Date: Thu, 15 Jun 2023 02:59:39 +0000
Subject: [PATCH 3/5] docstr

---
 flaml/autogen/oai/completion.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flaml/autogen/oai/completion.py b/flaml/autogen/oai/completion.py
index cf760d3e49..a7e10403a7 100644
--- a/flaml/autogen/oai/completion.py
+++ b/flaml/autogen/oai/completion.py
@@ -1007,7 +1007,7 @@ def extract_text_or_function_call(cls, response: dict) -> List[str]:
             response (dict): The response from OpenAI API.
 
         Returns:
-            A list of function calls in the responses.
+            A list of text or function calls in the responses.
         """
         choices = response["choices"]
         if "text" in choices[0]:

From 1b6dcbd9648a7e9a61345358f09caf6f3cced938 Mon Sep 17 00:00:00 2001
From: Chi Wang <wang.chi@microsoft.com>
Date: Thu, 15 Jun 2023 03:36:49 +0000
Subject: [PATCH 4/5] function_call and content may co-exist

---
 flaml/autogen/oai/completion.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/flaml/autogen/oai/completion.py b/flaml/autogen/oai/completion.py
index a7e10403a7..d437559823 100644
--- a/flaml/autogen/oai/completion.py
+++ b/flaml/autogen/oai/completion.py
@@ -1012,7 +1012,10 @@ def extract_text_or_function_call(cls, response: dict) -> List[str]:
         choices = response["choices"]
         if "text" in choices[0]:
             return [choice["text"] for choice in choices]
-        return [choice["message"].get("content") or choice["message"].get("function_call", "") for choice in choices]
+        return [
+            choice["message"] if "function_call" in choice["message"] else choice["message"].get("content", "")
+            for choice in choices
+        ]
 
     @classmethod
     @property

From f68afe73108d518e9318691afd06b8b56caf707b Mon Sep 17 00:00:00 2001
From: Chi Wang <wang.chi@microsoft.com>
Date: Thu, 15 Jun 2023 22:30:25 +0000
Subject: [PATCH 5/5] test function call

---
 test/autogen/test_function_call.py | 63 ++++++++++++++++++++++++++++++
 1 file changed, 63 insertions(+)
 create mode 100644 test/autogen/test_function_call.py

diff --git a/test/autogen/test_function_call.py b/test/autogen/test_function_call.py
new file mode 100644
index 0000000000..9a86ae9414
--- /dev/null
+++ b/test/autogen/test_function_call.py
@@ -0,0 +1,63 @@
+try:
+    import openai
+except ImportError:
+    openai = None
+import pytest
+import json
+from flaml import oai
+from flaml.autogen.math_utils import eval_math_responses
+
+KEY_LOC = "test/autogen"
+
+
+@pytest.mark.skipif(openai is None, reason="openai not installed")
+def test_eval_math_responses():
+    config_list = oai.config_list_openai_aoai(KEY_LOC, exclude="aoai")
+    functions = [
+        {
+            "name": "eval_math_responses",
+            "description": "Select a response for a math problem using voting, and check if the response is correct if the solution is provided",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "responses": {
+                        "type": "string",
+                        "description": "The responses in a list",
+                    },
+                    "solution": {
+                        "type": "string",
+                        "description": "The canonical solution",
+                    },
+                },
+                "required": ["responses"],
+            },
+        },
+    ]
+    response = oai.ChatCompletion.create(
+        model="gpt-3.5-turbo-0613",
+        config_list=config_list,
+        messages=[
+            {
+                "role": "user",
+                "content": 'evaluate the math responses ["1", "5/2", "5/2"] against the true answer \\frac{5}{2}',
+            },
+        ],
+        functions=functions,
+    )
+    print(response)
+    responses = oai.ChatCompletion.extract_text_or_function_call(response)
+    print(responses[0])
+    function_call = responses[0]["function_call"]
+    name, arguments = function_call["name"], json.loads(function_call["arguments"])
+    assert name == "eval_math_responses"
+    print(arguments["responses"])
+    if isinstance(arguments["responses"], str):
+        arguments["responses"] = json.loads(arguments["responses"])
+    arguments["responses"] = [f"\\boxed{{{x}}}" for x in arguments["responses"]]
+    print(arguments["responses"])
+    arguments["solution"] = f"\\boxed{{{arguments['solution']}}}"
+    print(eval_math_responses(**arguments))
+
+
+if __name__ == "__main__":
+    test_eval_math_responses()