Disable Azure tests and fix open ai tests (#3587)

ekzhu · web-flow · commit db287183f2ec · 2024-10-01T21:53:25.000Z
* Disable Azure tests

* fix calculator notebook

* use gpt-4o-mini for tests

* use gpt-4o

* use gpt-4o

* fix formatting

* Fix models used in contrib tests

* Fix retrieval test

* WIP

* Skip

* Fix format

* Fix formatting
diff --git a/autogen/agentchat/contrib/vectordb/pgvectordb.py b/autogen/agentchat/contrib/vectordb/pgvectordb.py
@@ -4,16 +4,17 @@
 from typing import Callable, List, Optional, Union
 
 import numpy as np
+
+# try:
+import pgvector
+from pgvector.psycopg import register_vector
 from sentence_transformers import SentenceTransformer
 
 from .base import Document, ItemID, QueryResults, VectorDB
 from .utils import get_logger
 
-try:
-    import pgvector
-    from pgvector.psycopg import register_vector
-except ImportError:
-    raise ImportError("Please install pgvector: `pip install pgvector`")
+# except ImportError:
+#     raise ImportError("Please install pgvector: `pip install pgvector`")
 
 try:
     import psycopg
@@ -416,6 +417,7 @@ def query(
         results = []
         for query_text in query_texts:
             vector = self.embedding_function(query_text)
+            vector_string = "[" + ",".join([f"{x:.8f}" for x in vector]) + "]"
 
             if distance_type.lower() == "cosine":
                 index_function = "<=>"
@@ -428,7 +430,7 @@ def query(
             query = (
                 f"SELECT id, documents, embedding, metadatas "
                 f"FROM {self.name} "
-                f"{clause} embedding {index_function} '{str(vector)}' {distance_threshold} "
+                f"{clause} embedding {index_function} '{vector_string}' {distance_threshold} "
                 f"LIMIT {n_results}"
             )
             cursor.execute(query)
diff --git a/notebook/agentchat_MathChat.ipynb b/notebook/agentchat_MathChat.ipynb
@@ -57,9 +57,7 @@
     "    \"OAI_CONFIG_LIST\",\n",
     "    filter_dict={\n",
     "        \"model\": {\n",
-    "            \"gpt-4-1106-preview\",\n",
-    "            \"gpt-3.5-turbo\",\n",
-    "            \"gpt-35-turbo\",\n",
+    "            \"gpt-4o\",\n",
     "        }\n",
     "    },\n",
     ")"
diff --git a/notebook/agentchat_auto_feedback_from_code_execution.ipynb b/notebook/agentchat_auto_feedback_from_code_execution.ipynb
@@ -37,10 +37,10 @@
     "\n",
     "config_list = autogen.config_list_from_json(\n",
     "    \"OAI_CONFIG_LIST\",\n",
-    "    filter_dict={\"tags\": [\"gpt-4\"]},  # comment out to get all\n",
+    "    filter_dict={\"tags\": [\"gpt-4o\"]},  # comment out to get all\n",
     ")\n",
     "# When using a single openai endpoint, you can use the following:\n",
-    "# config_list = [{\"model\": \"gpt-4\", \"api_key\": os.getenv(\"OPENAI_API_KEY\")}]"
+    "# config_list = [{\"model\": \"gpt-4o\", \"api_key\": os.getenv(\"OPENAI_API_KEY\")}]"
    ]
   },
   {
diff --git a/notebook/agentchat_cost_token_tracking.ipynb b/notebook/agentchat_cost_token_tracking.ipynb
@@ -79,7 +79,7 @@
     "config_list = autogen.config_list_from_json(\n",
     "    \"OAI_CONFIG_LIST\",\n",
     "    filter_dict={\n",
-    "        \"model\": [\"gpt-3.5-turbo\", \"gpt-3.5-turbo-16k\"],  # comment out to get all\n",
+    "        \"model\": [\"gpt-3.5-turbo\"],  # comment out to get all\n",
     "    },\n",
     ")"
    ]
diff --git a/notebook/agentchat_function_call_currency_calculator.ipynb b/notebook/agentchat_function_call_currency_calculator.ipynb
@@ -65,7 +65,7 @@
     "\n",
     "config_list = autogen.config_list_from_json(\n",
     "    \"OAI_CONFIG_LIST\",\n",
-    "    filter_dict={\"tags\": [\"3.5-tool\"]},  # comment out to get all\n",
+    "    filter_dict={\"tags\": [\"tool\"]},  # comment out to get all\n",
     ")"
    ]
   },
diff --git a/notebook/agentchat_groupchat_finite_state_machine.ipynb b/notebook/agentchat_groupchat_finite_state_machine.ipynb
@@ -94,7 +94,7 @@
     "    \"cache_seed\": 44,  # change the seed for different trials\n",
     "    \"config_list\": autogen.config_list_from_json(\n",
     "        \"OAI_CONFIG_LIST\",\n",
-    "        filter_dict={\"tags\": [\"gpt-4\", \"gpt-4-32k\"]},  # comment out to get all\n",
+    "        filter_dict={\"tags\": [\"gpt-4o\"]},  # comment out to get all\n",
     "    ),\n",
     "    \"temperature\": 0,\n",
     "}"
diff --git a/notebook/agentchat_groupchat_stateflow.ipynb b/notebook/agentchat_groupchat_stateflow.ipynb
@@ -43,7 +43,7 @@
     "config_list = autogen.config_list_from_json(\n",
     "    \"OAI_CONFIG_LIST\",\n",
     "    filter_dict={\n",
-    "        \"tags\": [\"gpt-4\", \"gpt-4-32k\"],\n",
+    "        \"tags\": [\"gpt-4o\"],\n",
     "    },\n",
     ")"
    ]
diff --git a/test/agentchat/contrib/agent_eval/test_agent_eval.py b/test/agentchat/contrib/agent_eval/test_agent_eval.py
@@ -32,21 +32,15 @@ def remove_ground_truth(test_case: str):
         filter_dict={
             "api_type": ["openai"],
             "model": [
-                "gpt-4-turbo",
-                "gpt-4-turbo-preview",
-                "gpt-4-0125-preview",
-                "gpt-4-1106-preview",
+                "gpt-4o-mini",
                 "gpt-3.5-turbo",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-1106",
             ],
         },
     )
 
     aoai_config_list = autogen.config_list_from_json(
         OAI_CONFIG_LIST,
         file_location=KEY_LOC,
-        filter_dict={"api_type": ["azure"]},
     )
 
     success_str = open("test/test_files/agenteval-in-out/samples/sample_math_response_successful.txt", "r").read()
diff --git a/test/agentchat/contrib/capabilities/chat_with_teachable_agent.py b/test/agentchat/contrib/capabilities/chat_with_teachable_agent.py
@@ -11,7 +11,8 @@
 from test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST  # noqa: E402
 
 # Specify the model to use. GPT-3.5 is less reliable than GPT-4 at learning from user input.
-filter_dict = {"model": ["gpt-4-0125-preview"]}
+filter_dict = {"model": ["gpt-4o-mini"]}
+# filter_dict = {"model": ["gpt-4-0125-preview"]}
 # filter_dict = {"model": ["gpt-3.5-turbo-1106"]}
 # filter_dict = {"model": ["gpt-4-0613"]}
 # filter_dict = {"model": ["gpt-3.5-turbo"]}
diff --git a/test/agentchat/contrib/capabilities/test_image_generation_capability.py b/test/agentchat/contrib/capabilities/test_image_generation_capability.py
@@ -26,8 +26,6 @@
 sys.path.append(os.path.join(os.path.dirname(__file__), "../.."))
 from conftest import MOCK_OPEN_AI_API_KEY, skip_openai  # noqa: E402
 
-filter_dict = {"model": ["gpt-35-turbo-16k", "gpt-3.5-turbo-16k"]}
-
 RESOLUTIONS = ["256x256", "512x512", "1024x1024"]
 QUALITIES = ["standard", "hd"]
 PROMPTS = [
diff --git a/test/agentchat/contrib/capabilities/test_teachable_agent.py b/test/agentchat/contrib/capabilities/test_teachable_agent.py
@@ -28,7 +28,8 @@
 # filter_dict={"model": ["gpt-3.5-turbo-1106"]}
 # filter_dict={"model": ["gpt-3.5-turbo-0613"]}
 # filter_dict={"model": ["gpt-4"]}
-filter_dict = {"tags": ["gpt-35-turbo-16k", "gpt-3.5-turbo-16k"]}
+# filter_dict = {"tags": ["gpt-35-turbo-16k", "gpt-3.5-turbo-16k"]}
+filter_dict = {"model": ["gpt-4o-mini"]}
 
 
 def create_teachable_agent(reset_db=False, verbosity=0):
diff --git a/test/agentchat/contrib/retrievechat/test_pgvector_retrievechat.py b/test/agentchat/contrib/retrievechat/test_pgvector_retrievechat.py
@@ -78,7 +78,7 @@ def test_retrievechat():
             },
             "embedding_function": sentence_transformer_ef,
             "get_or_create": True,  # set to False if you don't want to reuse an existing collection
-            "overwrite": False,  # set to True if you want to overwrite an existing collection
+            "overwrite": True,  # set to True if you want to overwrite an existing collection
         },
         code_execution_config=False,  # set to False if you don't want to execute the code
     )
diff --git a/test/agentchat/contrib/retrievechat/test_qdrant_retrievechat.py b/test/agentchat/contrib/retrievechat/test_qdrant_retrievechat.py
@@ -69,6 +69,8 @@ def test_retrievechat():
             "client": client,
             "docs_path": "./website/docs",
             "chunk_token_size": 2000,
+            "get_or_create": True,
+            "overwrite": True,
         },
     )
 
diff --git a/test/agentchat/contrib/retrievechat/test_retrievechat.py b/test/agentchat/contrib/retrievechat/test_retrievechat.py
@@ -54,17 +54,19 @@ def test_retrievechat():
     )
 
     sentence_transformer_ef = ef.SentenceTransformerEmbeddingFunction()
+    docs_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../../website/docs"))
     ragproxyagent = RetrieveUserProxyAgent(
         name="ragproxyagent",
         human_input_mode="NEVER",
         max_consecutive_auto_reply=2,
         retrieve_config={
-            "docs_path": "./website/docs",
+            "docs_path": docs_path,
             "chunk_token_size": 2000,
             "model": config_list[0]["model"],
             "client": chromadb.PersistentClient(path="/tmp/chromadb"),
             "embedding_function": sentence_transformer_ef,
             "get_or_create": True,
+            "overwrite": True,
         },
     )
 
diff --git a/test/agentchat/contrib/test_agent_optimizer.py b/test/agentchat/contrib/test_agent_optimizer.py
@@ -89,7 +89,7 @@ def test_step():
         max_consecutive_auto_reply=3,
     )
 
-    optimizer = AgentOptimizer(max_actions_per_step=3, llm_config=llm_config)
+    optimizer = AgentOptimizer(max_actions_per_step=3, llm_config=llm_config, optimizer_model="gpt-4o-mini")
     user_proxy.initiate_chat(assistant, message=problem)
     optimizer.record_one_conversation(assistant.chat_messages_for_summary(user_proxy), is_satisfied=True)
 
diff --git a/test/agentchat/contrib/test_gpt_assistant.py b/test/agentchat/contrib/test_gpt_assistant.py
@@ -40,11 +40,12 @@
             ],
         },
     )
-    aoai_config_list = autogen.config_list_from_json(
-        OAI_CONFIG_LIST,
-        file_location=KEY_LOC,
-        filter_dict={"api_type": ["azure"], "tags": ["assistant"]},
-    )
+    # TODO: fix azure settings or remove it.
+    # aoai_config_list = autogen.config_list_from_json(
+    #     OAI_CONFIG_LIST,
+    #     file_location=KEY_LOC,
+    #     filter_dict={"api_type": ["azure"], "tags": ["assistant"]},
+    # )
 
 
 @pytest.mark.skipif(
@@ -53,17 +54,17 @@
 )
 def test_config_list() -> None:
     assert len(openai_config_list) > 0
-    assert len(aoai_config_list) > 0
+    # TODO: fix azure settings or remove it.
+    # assert len(aoai_config_list) > 0
 
 
 @pytest.mark.skipif(
     skip_openai,
     reason=reason,
 )
 def test_gpt_assistant_chat() -> None:
-    for gpt_config in [openai_config_list, aoai_config_list]:
-        _test_gpt_assistant_chat({"config_list": gpt_config})
-        _test_gpt_assistant_chat(gpt_config[0])
+    _test_gpt_assistant_chat({"config_list": openai_config_list})
+    _test_gpt_assistant_chat(openai_config_list[0])
 
 
 def _test_gpt_assistant_chat(gpt_config) -> None:
@@ -135,8 +136,8 @@ def ask_ossinsight(question: str) -> str:
     reason=reason,
 )
 def test_get_assistant_instructions() -> None:
-    for gpt_config in [openai_config_list, aoai_config_list]:
-        _test_get_assistant_instructions(gpt_config)
+    _test_get_assistant_instructions(openai_config_list)
+    # _test_get_assistant_instructions(aoai_config_list)
 
 
 def _test_get_assistant_instructions(gpt_config) -> None:
@@ -164,8 +165,8 @@ def _test_get_assistant_instructions(gpt_config) -> None:
     reason=reason,
 )
 def test_gpt_assistant_instructions_overwrite() -> None:
-    for gpt_config in [openai_config_list, aoai_config_list]:
-        _test_gpt_assistant_instructions_overwrite(gpt_config)
+    _test_gpt_assistant_instructions_overwrite(openai_config_list)
+    # _test_gpt_assistant_instructions_overwrite(aoai_config_list)
 
 
 def _test_gpt_assistant_instructions_overwrite(gpt_config) -> None:
diff --git a/test/agentchat/contrib/test_web_surfer.py b/test/agentchat/contrib/test_web_surfer.py
@@ -97,7 +97,7 @@ def test_web_surfer_oai() -> None:
     llm_config = {"config_list": config_list, "timeout": 180, "cache_seed": 42}
 
     # adding Azure name variations to the model list
-    model = ["gpt-3.5-turbo-1106", "gpt-3.5-turbo-16k-0613", "gpt-3.5-turbo-16k"]
+    model = ["gpt-4o-mini"]
     model += [m.replace(".", "") for m in model]
 
     summarizer_llm_config = {
diff --git a/test/agentchat/test_tool_calls.py b/test/agentchat/test_tool_calls.py
@@ -144,7 +144,7 @@ def test_update_tool():
     config_list_gpt4 = autogen.config_list_from_json(
         OAI_CONFIG_LIST,
         filter_dict={
-            "tags": ["gpt-4"],
+            "tags": ["gpt-4o-mini"],
         },
         file_location=KEY_LOC,
     )
diff --git a/test/io/test_websockets.py b/test/io/test_websockets.py
@@ -97,14 +97,8 @@ def on_connect(iostream: IOWebsockets, success_dict: Dict[str, bool] = success_d
                 OAI_CONFIG_LIST,
                 filter_dict={
                     "model": [
+                        "gpt-4o-mini",
                         "gpt-3.5-turbo",
-                        "gpt-3.5-turbo-16k",
-                        "gpt-4",
-                        "gpt-4-0314",
-                        "gpt4",
-                        "gpt-4-32k",
-                        "gpt-4-32k-0314",
-                        "gpt-4-32k-v0314",
                     ],
                 },
                 file_location=KEY_LOC,
diff --git a/test/oai/_test_completion.py b/test/oai/_test_completion.py
@@ -143,13 +143,8 @@ def test_nocontext():
             file_location=KEY_LOC,
             filter_dict={
                 "model": {
+                    "gpt-4o-mini",
                     "gpt-3.5-turbo",
-                    "gpt-3.5-turbo-16k",
-                    "gpt-3.5-turbo-16k-0613",
-                    "gpt-3.5-turbo-0301",
-                    "chatgpt-35-turbo-0301",
-                    "gpt-35-turbo-v0301",
-                    "gpt",
                 },
             },
         ),
@@ -179,13 +174,8 @@ def test_humaneval(num_samples=1):
         env_or_file=OAI_CONFIG_LIST,
         filter_dict={
             "model": {
+                "gpt-4o-mini",
                 "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-16k-0613",
-                "gpt-3.5-turbo-0301",
-                "chatgpt-35-turbo-0301",
-                "gpt-35-turbo-v0301",
-                "gpt",
             },
         },
         file_location=KEY_LOC,
diff --git a/test/oai/test_client.py b/test/oai/test_client.py
@@ -66,7 +66,8 @@ def get_usage(response):
         return {}
 
 
-@pytest.mark.skipif(skip, reason="openai>=1 not installed")
+# @pytest.mark.skipif(skip, reason="openai>=1 not installed")
+@pytest.mark.skip(reason="This test is not working until Azure settings are updated")
 def test_aoai_chat_completion():
     config_list = config_list_from_json(
         env_or_file=OAI_CONFIG_LIST,
@@ -88,7 +89,8 @@ def test_aoai_chat_completion():
     print(client.extract_text_or_completion_object(response))
 
 
-@pytest.mark.skipif(skip or not TOOL_ENABLED, reason="openai>=1.1.0 not installed")
+# @pytest.mark.skipif(skip or not TOOL_ENABLED, reason="openai>=1.1.0 not installed")
+@pytest.mark.skip(reason="This test is not working until Azure settings are updated")
 def test_oai_tool_calling_extraction():
     config_list = config_list_from_json(
         env_or_file=OAI_CONFIG_LIST,
diff --git a/test/oai/test_client_stream.py b/test/oai/test_client_stream.py
@@ -33,6 +33,7 @@
 
 
 @pytest.mark.skipif(skip, reason="openai>=1 not installed")
+@pytest.mark.skip(reason="This test is not working until Azure settings are updated.")
 def test_aoai_chat_completion_stream() -> None:
     config_list = config_list_from_json(
         env_or_file=OAI_CONFIG_LIST,
@@ -236,7 +237,7 @@ def test_chat_tools_stream() -> None:
     config_list = config_list_from_json(
         env_or_file=OAI_CONFIG_LIST,
         file_location=KEY_LOC,
-        filter_dict={"tags": ["multitool"]},
+        filter_dict={"tags": ["tool"]},
     )
     tools = [
         {

Original file line number	Diff line number	Diff line change
`@@ -37,10 +37,10 @@`
`37`	`37`	`"\n",`
`38`	`38`	`"config_list = autogen.config_list_from_json(\n",`
`39`	`39`	`" \"OAI_CONFIG_LIST\",\n",`
`40`		`- " filter_dict={\"tags\": [\"gpt-4\"]}, # comment out to get all\n",`
	`40`	`+ " filter_dict={\"tags\": [\"gpt-4o\"]}, # comment out to get all\n",`
`41`	`41`	`")\n",`
`42`	`42`	`"# When using a single openai endpoint, you can use the following:\n",`
`43`		`- "# config_list = [{\"model\": \"gpt-4\", \"api_key\": os.getenv(\"OPENAI_API_KEY\")}]"`
	`43`	`+ "# config_list = [{\"model\": \"gpt-4o\", \"api_key\": os.getenv(\"OPENAI_API_KEY\")}]"`
`44`	`44`	`]`
`45`	`45`	`},`
`46`	`46`	`{`
Original file line number	Diff line number	Diff line change
`@@ -79,7 +79,7 @@`
`79`	`79`	`"config_list = autogen.config_list_from_json(\n",`
`80`	`80`	`" \"OAI_CONFIG_LIST\",\n",`
`81`	`81`	`" filter_dict={\n",`
`82`		`- " \"model\": [\"gpt-3.5-turbo\", \"gpt-3.5-turbo-16k\"], # comment out to get all\n",`
	`82`	`+ " \"model\": [\"gpt-3.5-turbo\"], # comment out to get all\n",`
`83`	`83`	`" },\n",`
`84`	`84`	`")"`
`85`	`85`	`]`
Original file line number	Diff line number	Diff line change
`@@ -65,7 +65,7 @@`
`65`	`65`	`"\n",`
`66`	`66`	`"config_list = autogen.config_list_from_json(\n",`
`67`	`67`	`" \"OAI_CONFIG_LIST\",\n",`
`68`		`- " filter_dict={\"tags\": [\"3.5-tool\"]}, # comment out to get all\n",`
	`68`	`+ " filter_dict={\"tags\": [\"tool\"]}, # comment out to get all\n",`
`69`	`69`	`")"`
`70`	`70`	`]`
`71`	`71`	`},`
Original file line number	Diff line number	Diff line change
`@@ -43,7 +43,7 @@`
`43`	`43`	`"config_list = autogen.config_list_from_json(\n",`
`44`	`44`	`" \"OAI_CONFIG_LIST\",\n",`
`45`	`45`	`" filter_dict={\n",`
`46`		`- " \"tags\": [\"gpt-4\", \"gpt-4-32k\"],\n",`
	`46`	`+ " \"tags\": [\"gpt-4o\"],\n",`
`47`	`47`	`" },\n",`
`48`	`48`	`")"`
`49`	`49`	`]`
Original file line number	Diff line number	Diff line change
`@@ -78,7 +78,7 @@ def test_retrievechat():`
`78`	`78`	`},`
`79`	`79`	`"embedding_function": sentence_transformer_ef,`
`80`	`80`	`"get_or_create": True, # set to False if you don't want to reuse an existing collection`
`81`		`- "overwrite": False, # set to True if you want to overwrite an existing collection`
	`81`	`+ "overwrite": True, # set to True if you want to overwrite an existing collection`
`82`	`82`	`},`
`83`	`83`	`code_execution_config=False, # set to False if you don't want to execute the code`
`84`	`84`	`)`
Original file line number	Diff line number	Diff line change
`@@ -69,6 +69,8 @@ def test_retrievechat():`
`69`	`69`	`"client": client,`
`70`	`70`	`"docs_path": "./website/docs",`
`71`	`71`	`"chunk_token_size": 2000,`
	`72`	`+ "get_or_create": True,`
	`73`	`+ "overwrite": True,`
`72`	`74`	`},`
`73`	`75`	`)`
`74`	`76`
Original file line number	Diff line number	Diff line change
`@@ -89,7 +89,7 @@ def test_step():`
`89`	`89`	`max_consecutive_auto_reply=3,`
`90`	`90`	`)`
`91`	`91`
`92`		`- optimizer = AgentOptimizer(max_actions_per_step=3, llm_config=llm_config)`
	`92`	`+ optimizer = AgentOptimizer(max_actions_per_step=3, llm_config=llm_config, optimizer_model="gpt-4o-mini")`
`93`	`93`	`user_proxy.initiate_chat(assistant, message=problem)`
`94`	`94`	`optimizer.record_one_conversation(assistant.chat_messages_for_summary(user_proxy), is_satisfied=True)`
`95`	`95`
Original file line number	Diff line number	Diff line change
`@@ -144,7 +144,7 @@ def test_update_tool():`
`144`	`144`	`config_list_gpt4 = autogen.config_list_from_json(`
`145`	`145`	`OAI_CONFIG_LIST,`
`146`	`146`	`filter_dict={`
`147`		`- "tags": ["gpt-4"],`
	`147`	`+ "tags": ["gpt-4o-mini"],`
`148`	`148`	`},`
`149`	`149`	`file_location=KEY_LOC,`
`150`	`150`	`)`
Original file line number	Diff line number	Diff line change
`@@ -33,6 +33,7 @@`
`33`	`33`
`34`	`34`
`35`	`35`	`@pytest.mark.skipif(skip, reason="openai>=1 not installed")`
	`36`	`+@pytest.mark.skip(reason="This test is not working until Azure settings are updated.")`
`36`	`37`	`def test_aoai_chat_completion_stream() -> None:`
`37`	`38`	`config_list = config_list_from_json(`
`38`	`39`	`env_or_file=OAI_CONFIG_LIST,`
`@@ -236,7 +237,7 @@ def test_chat_tools_stream() -> None:`
`236`	`237`	`config_list = config_list_from_json(`
`237`	`238`	`env_or_file=OAI_CONFIG_LIST,`
`238`	`239`	`file_location=KEY_LOC,`
`239`		`- filter_dict={"tags": ["multitool"]},`
	`240`	`+ filter_dict={"tags": ["tool"]},`
`240`	`241`	`)`
`241`	`242`	`tools = [`
`242`	`243`	`{`