From 6d4cf406f9d1a32689892147257fa8cc0a781a48 Mon Sep 17 00:00:00 2001
From: Qingyun Wu <qingyun0327@gmail.com>
Date: Fri, 14 Jun 2024 11:58:17 -0400
Subject: [PATCH] Filter models with tags instead of model name (#2912)

* identify model with tags instead of model name

* models

* model to tag

* add more model name

* format

* Update test/agentchat/test_function_call.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* Update test/agentchat/test_function_call.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* Update test/agentchat/test_tool_calls.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* Update test/agentchat/test_tool_calls.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* remove uncessary tags

* use gpt-4 as tag

* model to tag

* add tag for teachable agent test

---------

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
Co-authored-by: AutoGen-Hub <flaml20201204@gmail.com>
---
 .../capabilities/test_context_handling.py     |  4 +---
 .../capabilities/test_teachable_agent.py      |  2 +-
 test/agentchat/test_agent_logging.py          |  2 +-
 test/agentchat/test_conversable_agent.py      | 24 ++++++++++---------
 test/agentchat/test_function_call.py          | 10 +++++---
 test/agentchat/test_tool_calls.py             | 18 ++++++++++----
 test/oai/test_client.py                       | 20 ++++++++++------
 test/oai/test_client_stream.py                |  6 ++---
 8 files changed, 52 insertions(+), 34 deletions(-)

diff --git a/test/agentchat/contrib/capabilities/test_context_handling.py b/test/agentchat/contrib/capabilities/test_context_handling.py
index 17cd7e9a655d..8cb1b60aff4e 100755
--- a/test/agentchat/contrib/capabilities/test_context_handling.py
+++ b/test/agentchat/contrib/capabilities/test_context_handling.py
@@ -73,9 +73,7 @@ def test_transform_chat_history_with_agents():
     config_list = autogen.config_list_from_json(
         OAI_CONFIG_LIST,
         KEY_LOC,
-        filter_dict={
-            "model": "gpt-3.5-turbo",
-        },
+        filter_dict={"tags": ["gpt-3.5-turbo"]},
     )
     assistant = AssistantAgent("assistant", llm_config={"config_list": config_list}, max_consecutive_auto_reply=1)
     context_handling = TransformChatHistory(max_messages=10, max_tokens_per_message=5, max_tokens=1000)
diff --git a/test/agentchat/contrib/capabilities/test_teachable_agent.py b/test/agentchat/contrib/capabilities/test_teachable_agent.py
index 261bd4801a4c..720bdc7ef6d2 100755
--- a/test/agentchat/contrib/capabilities/test_teachable_agent.py
+++ b/test/agentchat/contrib/capabilities/test_teachable_agent.py
@@ -28,7 +28,7 @@
 # filter_dict={"model": ["gpt-3.5-turbo-1106"]}
 # filter_dict={"model": ["gpt-3.5-turbo-0613"]}
 # filter_dict={"model": ["gpt-4"]}
-filter_dict = {"model": ["gpt-35-turbo-16k", "gpt-3.5-turbo-16k"]}
+filter_dict = {"tags": ["gpt-35-turbo-16k", "gpt-3.5-turbo-16k"]}
 
 
 def create_teachable_agent(reset_db=False, verbosity=0):
diff --git a/test/agentchat/test_agent_logging.py b/test/agentchat/test_agent_logging.py
index 8375a08444bf..3e479c0bba6e 100644
--- a/test/agentchat/test_agent_logging.py
+++ b/test/agentchat/test_agent_logging.py
@@ -44,7 +44,7 @@
     config_list = autogen.config_list_from_json(
         OAI_CONFIG_LIST,
         filter_dict={
-            "model": ["gpt-3.5-turbo"],
+            "tags": ["gpt-3.5-turbo"],
         },
         file_location=KEY_LOC,
     )
diff --git a/test/agentchat/test_conversable_agent.py b/test/agentchat/test_conversable_agent.py
index 3c2e79beb136..0a8d1daebc80 100755
--- a/test/agentchat/test_conversable_agent.py
+++ b/test/agentchat/test_conversable_agent.py
@@ -25,6 +25,8 @@
 
 here = os.path.abspath(os.path.dirname(__file__))
 
+gpt4_config_list = [{"model": "gpt-4"}, {"model": "gpt-4-turbo"}, {"model": "gpt-4-32k"}, {"model": "gpt-4o"}]
+
 
 @pytest.fixture
 def conversable_agent():
@@ -502,7 +504,7 @@ async def test_a_generate_reply_with_messages_and_sender_none(conversable_agent)
 def test_update_function_signature_and_register_functions() -> None:
     with pytest.MonkeyPatch.context() as mp:
         mp.setenv("OPENAI_API_KEY", MOCK_OPEN_AI_API_KEY)
-        agent = ConversableAgent(name="agent", llm_config={"config_list": [{"model": "gpt-4"}]})
+        agent = ConversableAgent(name="agent", llm_config={"config_list": gpt4_config_list})
 
         def exec_python(cell: str) -> None:
             pass
@@ -656,9 +658,9 @@ def get_origin(d: Dict[str, Callable[..., Any]]) -> Dict[str, Callable[..., Any]
 def test_register_for_llm():
     with pytest.MonkeyPatch.context() as mp:
         mp.setenv("OPENAI_API_KEY", MOCK_OPEN_AI_API_KEY)
-        agent3 = ConversableAgent(name="agent3", llm_config={"config_list": [{"model": "gpt-4"}]})
-        agent2 = ConversableAgent(name="agent2", llm_config={"config_list": [{"model": "gpt-4"}]})
-        agent1 = ConversableAgent(name="agent1", llm_config={"config_list": [{"model": "gpt-4"}]})
+        agent3 = ConversableAgent(name="agent3", llm_config={"config_list": gpt4_config_list})
+        agent2 = ConversableAgent(name="agent2", llm_config={"config_list": gpt4_config_list})
+        agent1 = ConversableAgent(name="agent1", llm_config={"config_list": gpt4_config_list})
 
         @agent3.register_for_llm()
         @agent2.register_for_llm(name="python")
@@ -729,9 +731,9 @@ async def exec_sh(script: Annotated[str, "Valid shell script to execute."]) -> s
 def test_register_for_llm_api_style_function():
     with pytest.MonkeyPatch.context() as mp:
         mp.setenv("OPENAI_API_KEY", MOCK_OPEN_AI_API_KEY)
-        agent3 = ConversableAgent(name="agent3", llm_config={"config_list": [{"model": "gpt-4"}]})
-        agent2 = ConversableAgent(name="agent2", llm_config={"config_list": [{"model": "gpt-4"}]})
-        agent1 = ConversableAgent(name="agent1", llm_config={"config_list": [{"model": "gpt-4"}]})
+        agent3 = ConversableAgent(name="agent3", llm_config={"config_list": gpt4_config_list})
+        agent2 = ConversableAgent(name="agent2", llm_config={"config_list": gpt4_config_list})
+        agent1 = ConversableAgent(name="agent1", llm_config={"config_list": gpt4_config_list})
 
         @agent3.register_for_llm(api_style="function")
         @agent2.register_for_llm(name="python", api_style="function")
@@ -800,7 +802,7 @@ async def exec_sh(script: Annotated[str, "Valid shell script to execute."]) -> s
 def test_register_for_llm_without_description():
     with pytest.MonkeyPatch.context() as mp:
         mp.setenv("OPENAI_API_KEY", MOCK_OPEN_AI_API_KEY)
-        agent = ConversableAgent(name="agent", llm_config={"config_list": [{"model": "gpt-4"}]})
+        agent = ConversableAgent(name="agent", llm_config={"config_list": gpt4_config_list})
 
         with pytest.raises(ValueError) as e:
 
@@ -877,7 +879,7 @@ async def exec_sh(script: Annotated[str, "Valid shell script to execute."]):
 def test_register_functions():
     with pytest.MonkeyPatch.context() as mp:
         mp.setenv("OPENAI_API_KEY", MOCK_OPEN_AI_API_KEY)
-        agent = ConversableAgent(name="agent", llm_config={"config_list": [{"model": "gpt-4"}]})
+        agent = ConversableAgent(name="agent", llm_config={"config_list": gpt4_config_list})
         user_proxy = UserProxyAgent(name="user_proxy")
 
         def exec_python(cell: Annotated[str, "Valid Python cell to execute."]) -> str:
@@ -1001,7 +1003,7 @@ async def test_function_registration_e2e_async() -> None:
     config_list = autogen.config_list_from_json(
         OAI_CONFIG_LIST,
         filter_dict={
-            "model": ["gpt-4", "gpt-4-0314", "gpt4", "gpt-4-32k", "gpt-4-32k-0314", "gpt-4-32k-v0314"],
+            "tags": ["gpt-4", "gpt-4-0314", "gpt4", "gpt-4-32k", "gpt-4-32k-0314", "gpt-4-32k-v0314"],
         },
         file_location=KEY_LOC,
     )
@@ -1072,7 +1074,7 @@ def stopwatch(num_seconds: Annotated[str, "Number of seconds in the stopwatch."]
 
 @pytest.mark.skipif(skip_openai, reason=reason)
 def test_max_turn():
-    config_list = autogen.config_list_from_json(OAI_CONFIG_LIST, KEY_LOC, filter_dict={"model": ["gpt-3.5-turbo"]})
+    config_list = autogen.config_list_from_json(OAI_CONFIG_LIST, KEY_LOC, filter_dict={"tags": ["gpt-3.5-turbo"]})
 
     # create an AssistantAgent instance named "assistant"
     assistant = autogen.AssistantAgent(
diff --git a/test/agentchat/test_function_call.py b/test/agentchat/test_function_call.py
index 793af6a30617..d3e174949b4b 100755
--- a/test/agentchat/test_function_call.py
+++ b/test/agentchat/test_function_call.py
@@ -24,8 +24,12 @@
 
 @pytest.mark.skipif(skip, reason="openai not installed OR requested to skip")
 def test_eval_math_responses():
-    config_list = autogen.config_list_from_models(
-        KEY_LOC, model_list=["gpt-4-0613", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k"]
+    config_list = autogen.config_list_from_json(
+        OAI_CONFIG_LIST,
+        filter_dict={
+            "tags": ["gpt-4", "gpt-3.5-turbo", "gpt-3.5-turbo-16k"],
+        },
+        file_location=KEY_LOC,
     )
     functions = [
         {
@@ -209,7 +213,7 @@ def test_update_function():
     config_list_gpt4 = autogen.config_list_from_json(
         OAI_CONFIG_LIST,
         filter_dict={
-            "model": ["gpt-4", "gpt-4-0314", "gpt4", "gpt-4-32k", "gpt-4-32k-0314", "gpt-4-32k-v0314"],
+            "tags": ["gpt-4", "gpt-4-32k", "gpt-4o"],
         },
         file_location=KEY_LOC,
     )
diff --git a/test/agentchat/test_tool_calls.py b/test/agentchat/test_tool_calls.py
index 710ab14f5f01..6a12d2d96edf 100755
--- a/test/agentchat/test_tool_calls.py
+++ b/test/agentchat/test_tool_calls.py
@@ -23,8 +23,10 @@
 
 @pytest.mark.skipif(skip_openai or not TOOL_ENABLED, reason="openai>=1.1.0 not installed or requested to skip")
 def test_eval_math_responses():
-    config_list = autogen.config_list_from_models(
-        KEY_LOC, model_list=["gpt-4-0613", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k"]
+    config_list = autogen.config_list_from_json(
+        OAI_CONFIG_LIST,
+        KEY_LOC,
+        filter_dict={"tags": ["tool"]},
     )
     tools = [
         {
@@ -78,9 +80,15 @@ def test_eval_math_responses():
 
 @pytest.mark.skipif(skip_openai or not TOOL_ENABLED, reason="openai>=1.1.0 not installed or requested to skip")
 def test_eval_math_responses_api_style_function():
-    config_list = autogen.config_list_from_models(
+    # config_list = autogen.config_list_from_models(
+    #     KEY_LOC,
+    #     model_list=["gpt-4-0613", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k"],
+    # )
+
+    config_list = autogen.config_list_from_json(
+        OAI_CONFIG_LIST,
         KEY_LOC,
-        model_list=["gpt-4-0613", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k"],
+        filter_dict={"tags": ["tool"]},
     )
     functions = [
         {
@@ -136,7 +144,7 @@ def test_update_tool():
     config_list_gpt4 = autogen.config_list_from_json(
         OAI_CONFIG_LIST,
         filter_dict={
-            "model": ["gpt-4", "gpt-4-0314", "gpt4", "gpt-4-32k", "gpt-4-32k-0314", "gpt-4-32k-v0314"],
+            "tags": ["gpt-4"],
         },
         file_location=KEY_LOC,
     )
diff --git a/test/oai/test_client.py b/test/oai/test_client.py
index d4878d640e5c..debad5fae3b0 100755
--- a/test/oai/test_client.py
+++ b/test/oai/test_client.py
@@ -36,7 +36,7 @@ def test_aoai_chat_completion():
     config_list = config_list_from_json(
         env_or_file=OAI_CONFIG_LIST,
         file_location=KEY_LOC,
-        filter_dict={"api_type": ["azure"], "model": ["gpt-3.5-turbo", "gpt-35-turbo"]},
+        filter_dict={"api_type": ["azure"], "tags": ["gpt-3.5-turbo"]},
     )
     client = OpenAIWrapper(config_list=config_list)
     response = client.create(messages=[{"role": "user", "content": "2+2="}], cache_seed=None)
@@ -58,7 +58,7 @@ def test_oai_tool_calling_extraction():
     config_list = config_list_from_json(
         env_or_file=OAI_CONFIG_LIST,
         file_location=KEY_LOC,
-        filter_dict={"api_type": ["azure"], "model": ["gpt-3.5-turbo", "gpt-35-turbo"]},
+        filter_dict={"api_type": ["azure"], "tags": ["gpt-3.5-turbo"]},
     )
     client = OpenAIWrapper(config_list=config_list)
     response = client.create(
@@ -105,7 +105,9 @@ def test_chat_completion():
 @pytest.mark.skipif(skip, reason="openai>=1 not installed")
 def test_completion():
     config_list = config_list_from_json(
-        env_or_file=OAI_CONFIG_LIST, file_location=KEY_LOC, filter_dict={"tags": ["gpt-3.5-turbo-instruct"]}
+        env_or_file=OAI_CONFIG_LIST,
+        file_location=KEY_LOC,
+        filter_dict={"tags": ["gpt-35-turbo-instruct", "gpt-3.5-turbo-instruct"]},
     )
     client = OpenAIWrapper(config_list=config_list)
     response = client.create(prompt="1+1=")
@@ -123,7 +125,9 @@ def test_completion():
 )
 def test_cost(cache_seed):
     config_list = config_list_from_json(
-        env_or_file=OAI_CONFIG_LIST, file_location=KEY_LOC, filter_dict={"tags": ["gpt-3.5-turbo-instruct"]}
+        env_or_file=OAI_CONFIG_LIST,
+        file_location=KEY_LOC,
+        filter_dict={"tags": ["gpt-35-turbo-instruct", "gpt-3.5-turbo-instruct"]},
     )
     client = OpenAIWrapper(config_list=config_list, cache_seed=cache_seed)
     response = client.create(prompt="1+3=")
@@ -145,7 +149,9 @@ def test_customized_cost():
 @pytest.mark.skipif(skip, reason="openai>=1 not installed")
 def test_usage_summary():
     config_list = config_list_from_json(
-        env_or_file=OAI_CONFIG_LIST, file_location=KEY_LOC, filter_dict={"tags": ["gpt-3.5-turbo-instruct"]}
+        env_or_file=OAI_CONFIG_LIST,
+        file_location=KEY_LOC,
+        filter_dict={"tags": ["gpt-35-turbo-instruct", "gpt-3.5-turbo-instruct"]},
     )
     client = OpenAIWrapper(config_list=config_list)
     response = client.create(prompt="1+3=", cache_seed=None)
@@ -181,7 +187,7 @@ def test_legacy_cache():
     config_list = config_list_from_json(
         env_or_file=OAI_CONFIG_LIST,
         file_location=KEY_LOC,
-        filter_dict={"model": ["gpt-3.5-turbo", "gpt-35-turbo"]},
+        filter_dict={"tags": ["gpt-3.5-turbo"]},
     )
 
     # Prompt to use for testing.
@@ -250,7 +256,7 @@ def test_cache():
     config_list = config_list_from_json(
         env_or_file=OAI_CONFIG_LIST,
         file_location=KEY_LOC,
-        filter_dict={"model": ["gpt-3.5-turbo", "gpt-35-turbo"]},
+        filter_dict={"tags": ["gpt-3.5-turbo"]},
     )
 
     # Prompt to use for testing.
diff --git a/test/oai/test_client_stream.py b/test/oai/test_client_stream.py
index 1e0f3055d099..59abd97151ad 100755
--- a/test/oai/test_client_stream.py
+++ b/test/oai/test_client_stream.py
@@ -37,7 +37,7 @@ def test_aoai_chat_completion_stream() -> None:
     config_list = config_list_from_json(
         env_or_file=OAI_CONFIG_LIST,
         file_location=KEY_LOC,
-        filter_dict={"api_type": ["azure"], "model": ["gpt-3.5-turbo", "gpt-35-turbo"]},
+        filter_dict={"api_type": ["azure"], "tags": ["gpt-3.5-turbo"]},
     )
     client = OpenAIWrapper(config_list=config_list)
     response = client.create(messages=[{"role": "user", "content": "2+2="}], stream=True)
@@ -50,7 +50,7 @@ def test_chat_completion_stream() -> None:
     config_list = config_list_from_json(
         env_or_file=OAI_CONFIG_LIST,
         file_location=KEY_LOC,
-        filter_dict={"model": ["gpt-3.5-turbo", "gpt-35-turbo"]},
+        filter_dict={"tags": ["gpt-3.5-turbo"]},
     )
     client = OpenAIWrapper(config_list=config_list)
     response = client.create(messages=[{"role": "user", "content": "1+1="}], stream=True)
@@ -202,7 +202,7 @@ def test_chat_functions_stream() -> None:
     config_list = config_list_from_json(
         env_or_file=OAI_CONFIG_LIST,
         file_location=KEY_LOC,
-        filter_dict={"model": ["gpt-3.5-turbo", "gpt-35-turbo"]},
+        filter_dict={"tags": ["gpt-3.5-turbo"]},
     )
     functions = [
         {