Skip to content

TEST/change gpt4, gpt4o serise to gpt4.1nano #6375

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions python/packages/autogen-agentchat/tests/test_group_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,7 @@ async def test_round_robin_group_chat_with_tools(runtime: AgentRuntime | None) -
"TERMINATE",
],
model_info={
"family": "gpt-4o",
"family": "gpt-4.1-nano",
"function_calling": True,
"json_output": True,
"vision": True,
Expand Down Expand Up @@ -1272,7 +1272,7 @@ async def test_swarm_handoff_using_tool_calls(runtime: AgentRuntime | None) -> N
"TERMINATE",
],
model_info={
"family": "gpt-4o",
"family": "gpt-4.1-nano",
"function_calling": True,
"json_output": True,
"vision": True,
Expand Down Expand Up @@ -1372,7 +1372,7 @@ async def test_swarm_with_parallel_tool_calls(runtime: AgentRuntime | None) -> N
"TERMINATE",
],
model_info={
"family": "gpt-4o",
"family": "gpt-4.1-nano",
"function_calling": True,
"json_output": True,
"vision": True,
Expand Down Expand Up @@ -1562,12 +1562,14 @@ async def test_declarative_groupchats_with_config(runtime: AgentRuntime | None)
# Create basic agents and components for testing
agent1 = AssistantAgent(
"agent_1",
model_client=OpenAIChatCompletionClient(model="gpt-4o-2024-05-13", api_key=""),
model_client=OpenAIChatCompletionClient(model="gpt-4.1-nano-2025-04-14", api_key=""),
handoffs=["agent_2"],
)
agent2 = AssistantAgent("agent_2", model_client=OpenAIChatCompletionClient(model="gpt-4o-2024-05-13", api_key=""))
agent2 = AssistantAgent(
"agent_2", model_client=OpenAIChatCompletionClient(model="gpt-4.1-nano-2025-04-14", api_key="")
)
termination = MaxMessageTermination(4)
model_client = OpenAIChatCompletionClient(model="gpt-4o-2024-05-13", api_key="")
model_client = OpenAIChatCompletionClient(model="gpt-4.1-nano-2025-04-14", api_key="")

# Test round robin - verify config is preserved
round_robin = RoundRobinGroupChat(participants=[agent1, agent2], termination_condition=termination, max_turns=5)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ async def test_selector_group_chat_openai() -> None:
pytest.skip("OPENAI_API_KEY not set in environment variables.")

model_client = OpenAIChatCompletionClient(
model="gpt-4o-mini",
model="gpt-4.1-nano",
api_key=api_key,
)
await _test_selector_group_chat(model_client)
Expand Down
6 changes: 3 additions & 3 deletions python/packages/autogen-core/tests/test_model_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ async def test_unbounded_model_context() -> None:
@pytest.mark.parametrize(
"model_client,token_limit",
[
(OpenAIChatCompletionClient(model="gpt-4o", temperature=0.0, api_key="test"), 30),
(OpenAIChatCompletionClient(model="gpt-4.1-nano", temperature=0.0, api_key="test"), 30),
(OllamaChatCompletionClient(model="llama3.3"), 20),
],
ids=["openai", "ollama"],
Expand Down Expand Up @@ -159,7 +159,7 @@ async def test_token_limited_model_context_with_token_limit(
@pytest.mark.parametrize(
"model_client",
[
OpenAIChatCompletionClient(model="gpt-4o", temperature=0.0, api_key="test_key"),
OpenAIChatCompletionClient(model="gpt-4.1-nano", temperature=0.0, api_key="test_key"),
OllamaChatCompletionClient(model="llama3.3"),
],
ids=["openai", "ollama"],
Expand All @@ -182,7 +182,7 @@ async def test_token_limited_model_context_without_token_limit(model_client: Cha
@pytest.mark.parametrize(
"model_client,token_limit",
[
(OpenAIChatCompletionClient(model="gpt-4o", temperature=0.0, api_key="test"), 60),
(OpenAIChatCompletionClient(model="gpt-4.1-nano", temperature=0.0, api_key="test"), 60),
(OllamaChatCompletionClient(model="llama3.3"), 50),
],
ids=["openai", "ollama"],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ class MockChunkEvent(BaseModel):


async def _mock_create_stream(*args: Any, **kwargs: Any) -> AsyncGenerator[ChatCompletionChunk, None]:
model = resolve_model(kwargs.get("model", "gpt-4o"))
model = resolve_model(kwargs.get("model", "gpt-4.1-nano"))
mock_chunks_content = ["Hello", " Another Hello", " Yet Another Hello"]

# The openai api implementations (OpenAI and Litellm) stream chunks of tokens
Expand Down Expand Up @@ -167,7 +167,7 @@ async def _mock_create_stream(*args: Any, **kwargs: Any) -> AsyncGenerator[ChatC

async def _mock_create(*args: Any, **kwargs: Any) -> ChatCompletion | AsyncGenerator[ChatCompletionChunk, None]:
stream = kwargs.get("stream", False)
model = resolve_model(kwargs.get("model", "gpt-4o"))
model = resolve_model(kwargs.get("model", "gpt-4.1-nano"))
if not stream:
await asyncio.sleep(0.1)
return ChatCompletion(
Expand All @@ -186,7 +186,7 @@ async def _mock_create(*args: Any, **kwargs: Any) -> ChatCompletion | AsyncGener

@pytest.mark.asyncio
async def test_openai_chat_completion_client() -> None:
client = OpenAIChatCompletionClient(model="gpt-4o", api_key="api_key")
client = OpenAIChatCompletionClient(model="gpt-4.1-nano", api_key="api_key")
assert client


Expand All @@ -198,7 +198,7 @@ async def test_openai_chat_completion_client_with_gemini_model() -> None:

@pytest.mark.asyncio
async def test_openai_chat_completion_client_serialization() -> None:
client = OpenAIChatCompletionClient(model="gpt-4o", api_key="sk-password")
client = OpenAIChatCompletionClient(model="gpt-4.1-nano", api_key="sk-password")
assert client
config = client.dump_component()
assert config
Expand Down Expand Up @@ -467,7 +467,7 @@ async def run(self, args: MyArgs, cancellation_token: CancellationToken) -> MyRe

@pytest.mark.asyncio
async def test_json_mode(monkeypatch: pytest.MonkeyPatch) -> None:
model = "gpt-4o-2024-11-20"
model = "gpt-4.1-nano-2025-04-14"

called_args = {}

Expand Down Expand Up @@ -562,7 +562,7 @@ class AgentResponse(BaseModel):
thoughts: str
response: Literal["happy", "sad", "neutral"]

model = "gpt-4o-2024-11-20"
model = "gpt-4.1-nano-2025-04-14"

called_args = {}

Expand Down Expand Up @@ -654,7 +654,7 @@ class AgentResponse(BaseModel):
thoughts: str
response: Literal["happy", "sad", "neutral"]

model = "gpt-4o-2024-11-20"
model = "gpt-4.1-nano-2025-04-14"

async def _mock_parse(*args: Any, **kwargs: Any) -> ParsedChatCompletion[AgentResponse]:
return ParsedChatCompletion(
Expand Down Expand Up @@ -737,7 +737,7 @@ class AgentResponse(BaseModel):
thoughts: str
response: Literal["happy", "sad", "neutral"]

model = "gpt-4o-2024-11-20"
model = "gpt-4.1-nano-2025-04-14"

async def _mock_parse(*args: Any, **kwargs: Any) -> ParsedChatCompletion[AgentResponse]:
return ParsedChatCompletion(
Expand Down Expand Up @@ -813,7 +813,7 @@ class AgentResponse(BaseModel):
chunked_content = [raw_content[i : i + 5] for i in range(0, len(raw_content), 5)]
assert "".join(chunked_content) == raw_content

model = "gpt-4o-2024-11-20"
model = "gpt-4.1-nano-2025-04-14"
mock_chunk_events = [
MockChunkEvent(
type="chunk",
Expand Down Expand Up @@ -886,7 +886,7 @@ class AgentResponse(BaseModel):
chunked_content = [raw_content[i : i + 5] for i in range(0, len(raw_content), 5)]
assert "".join(chunked_content) == raw_content

model = "gpt-4o-2024-11-20"
model = "gpt-4.1-nano-2025-04-14"

# generate the list of mock chunk content
mock_chunk_events = [
Expand Down Expand Up @@ -1265,7 +1265,7 @@ async def _mock_create(*args: Any, **kwargs: Any) -> ChatCompletion | AsyncGener

@pytest.mark.asyncio
async def test_tool_calling(monkeypatch: pytest.MonkeyPatch) -> None:
model = "gpt-4o-2024-05-13"
model = "gpt-4.1-nano-2025-04-14"
chat_completions = [
# Successful completion, single tool call
ChatCompletion(
Expand Down Expand Up @@ -1622,7 +1622,7 @@ def openai_client(request: pytest.FixtureRequest) -> OpenAIChatCompletionClient:
@pytest.mark.asyncio
@pytest.mark.parametrize(
"model",
["gpt-4o-mini", "gemini-1.5-flash", "claude-3-5-haiku-20241022"],
["gpt-4.1-nano", "gemini-1.5-flash", "claude-3-5-haiku-20241022"],
)
async def test_model_client_basic_completion(model: str, openai_client: OpenAIChatCompletionClient) -> None:
# Test basic completion
Expand All @@ -1639,7 +1639,7 @@ async def test_model_client_basic_completion(model: str, openai_client: OpenAICh
@pytest.mark.asyncio
@pytest.mark.parametrize(
"model",
["gpt-4o-mini", "gemini-1.5-flash", "claude-3-5-haiku-20241022"],
["gpt-4.1-nano", "gemini-1.5-flash", "claude-3-5-haiku-20241022"],
)
async def test_model_client_with_function_calling(model: str, openai_client: OpenAIChatCompletionClient) -> None:
# Test tool calling
Expand Down Expand Up @@ -1716,7 +1716,7 @@ async def test_model_client_with_function_calling(model: str, openai_client: Ope
@pytest.mark.asyncio
@pytest.mark.parametrize(
"model",
["gpt-4o-mini", "gemini-1.5-flash"],
["gpt-4.1-nano", "gemini-1.5-flash"],
)
async def test_openai_structured_output_using_response_format(
model: str, openai_client: OpenAIChatCompletionClient
Expand Down Expand Up @@ -1749,7 +1749,7 @@ class AgentResponse(BaseModel):
@pytest.mark.asyncio
@pytest.mark.parametrize(
"model",
["gpt-4o-mini", "gemini-1.5-flash"],
["gpt-4.1-nano", "gemini-1.5-flash"],
)
async def test_openai_structured_output(model: str, openai_client: OpenAIChatCompletionClient) -> None:
class AgentResponse(BaseModel):
Expand All @@ -1769,7 +1769,7 @@ class AgentResponse(BaseModel):
@pytest.mark.asyncio
@pytest.mark.parametrize(
"model",
["gpt-4o-mini", "gemini-1.5-flash"],
["gpt-4.1-nano", "gemini-1.5-flash"],
)
async def test_openai_structured_output_with_streaming(model: str, openai_client: OpenAIChatCompletionClient) -> None:
class AgentResponse(BaseModel):
Expand All @@ -1795,7 +1795,7 @@ class AgentResponse(BaseModel):
@pytest.mark.parametrize(
"model",
[
"gpt-4o-mini",
"gpt-4.1-nano",
# "gemini-1.5-flash", # Gemini models do not support structured output with tool calls from model client.
],
)
Expand Down Expand Up @@ -1853,7 +1853,7 @@ def sentiment_analysis(text: str) -> str:
@pytest.mark.parametrize(
"model",
[
"gpt-4o-mini",
"gpt-4.1-nano",
# "gemini-1.5-flash", # Gemini models do not support structured output with tool calls from model client.
],
)
Expand Down Expand Up @@ -2072,7 +2072,7 @@ async def test_add_name_prefixes(monkeypatch: pytest.MonkeyPatch) -> None:
@pytest.mark.parametrize(
"model",
[
"gpt-4o-mini",
"gpt-4.1-nano",
"gemini-1.5-flash",
"claude-3-5-haiku-20241022",
],
Expand Down Expand Up @@ -2188,7 +2188,7 @@ async def test_system_message_not_merged_for_multiple_system_messages_true() ->
mock_client = MagicMock()
client = BaseOpenAIChatCompletionClient(
client=mock_client,
create_args={"model": "gpt-4o"},
create_args={"model": "gpt-4.1-nano"},
model_info={
"vision": False,
"function_calling": False,
Expand Down Expand Up @@ -2355,7 +2355,7 @@ async def test_empty_assistant_content_with_gemini(model: str, openai_client: Op
@pytest.mark.parametrize(
"model",
[
"gpt-4o-mini",
"gpt-4.1-nano",
"gemini-1.5-flash",
"claude-3-5-haiku-20241022",
],
Expand Down Expand Up @@ -2402,7 +2402,7 @@ def get_regitered_transformer(client: OpenAIChatCompletionClient) -> Transformer
@pytest.mark.parametrize(
"model",
[
"gpt-4o-mini",
"gpt-4.1-nano",
],
)
async def test_openai_model_unknown_message_type(model: str, openai_client: OpenAIChatCompletionClient) -> None:
Expand Down
4 changes: 2 additions & 2 deletions python/packages/autogen-ext/tests/test_filesurfer_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ async def test_run_filesurfer(monkeypatch: pytest.MonkeyPatch) -> None:
</html>""")

# Mock the API calls
model = "gpt-4o-2024-05-13"
model = "gpt-4.1-nano-2025-04-14"
chat_completions = [
ChatCompletion(
id="id1",
Expand Down Expand Up @@ -153,7 +153,7 @@ async def test_run_filesurfer(monkeypatch: pytest.MonkeyPatch) -> None:
@pytest.mark.asyncio
async def test_file_surfer_serialization() -> None:
"""Test that FileSurfer can be serialized and deserialized properly."""
model = "gpt-4o-2024-05-13"
model = "gpt-4.1-nano-2025-04-14"
agent = FileSurfer(
"FileSurfer",
model_client=OpenAIChatCompletionClient(model=model, api_key=""),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ def agent(client: AsyncOpenAI) -> OpenAIAssistantAgent:
return OpenAIAssistantAgent(
name="assistant",
instructions="Help the user with their task.",
model="gpt-4o-mini",
model="gpt-4.1-nano",
description="OpenAI Assistant Agent",
client=client,
tools=tools,
Expand Down Expand Up @@ -346,7 +346,7 @@ async def test_on_reset_behavior(client: AsyncOpenAI, cancellation_token: Cancel
agent = OpenAIAssistantAgent(
name="assistant",
instructions="Help the user with their task.",
model="gpt-4o-mini",
model="gpt-4.1-nano",
description="OpenAI Assistant Agent",
client=client,
thread_id=thread.id,
Expand Down
4 changes: 2 additions & 2 deletions python/packages/autogen-ext/tests/test_websurfer_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ async def mock_create(

@pytest.mark.asyncio
async def test_run_websurfer(monkeypatch: pytest.MonkeyPatch) -> None:
model = "gpt-4o-2024-05-13"
model = "gpt-4.1-nano-2025-04-14"
chat_completions = [
ChatCompletion(
id="id2",
Expand Down Expand Up @@ -149,7 +149,7 @@ async def test_run_websurfer(monkeypatch: pytest.MonkeyPatch) -> None:

@pytest.mark.asyncio
async def test_run_websurfer_declarative(monkeypatch: pytest.MonkeyPatch) -> None:
model = "gpt-4o-2024-05-13"
model = "gpt-4.1-nano-2025-04-14"
chat_completions = [
ChatCompletion(
id="id1",
Expand Down
4 changes: 2 additions & 2 deletions python/packages/autogen-studio/tests/test_db_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def sample_team(test_user: str) -> Team:
agent = AssistantAgent(
name="weather_agent",
model_client=OpenAIChatCompletionClient(
model="gpt-4",
model="gpt-4.1-nano",
),
)

Expand Down Expand Up @@ -187,4 +187,4 @@ def test_initialize_database_scenarios(self, tmp_path, monkeypatch):

finally:
asyncio.run(db.close())
db.reset_db()
db.reset_db()
4 changes: 2 additions & 2 deletions python/packages/autogen-studio/tests/test_team_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def sample_config():
agent = AssistantAgent(
name="weather_agent",
model_client=OpenAIChatCompletionClient(
model="gpt-4o-mini",
model="gpt-4.1-nano",
),
)

Expand Down Expand Up @@ -146,4 +146,4 @@ async def mock_run_stream(*args, **kwargs):

# Verify the last message is a TeamResult
assert isinstance(streamed_messages[-1], type(mock_messages[-1]))


Loading