-
Notifications
You must be signed in to change notification settings - Fork 5.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
add math-class group chat test #309
Changes from 5 commits
c3944fd
39c9652
9a8ee74
4439a1e
a1993bf
066001e
50eaa34
25b4ffa
7f83554
5bbf8b2
02c52a3
d517e22
e12d5e4
f262a88
c8032f2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,7 @@ | ||
import pytest | ||
import autogen | ||
from test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST | ||
import random | ||
|
||
|
||
def test_func_call_groupchat(): | ||
|
@@ -49,6 +51,181 @@ def test_func_call_groupchat(): | |
agent2.initiate_chat(group_chat_manager, message={"function_call": {"name": "func", "arguments": '{"x": 1}'}}) | ||
|
||
|
||
def test_group_chat_math_class(): | ||
""" | ||
This test case is to simulate a math class. | ||
where teacher creates math questions and student resolves the questions. | ||
teacher will create a question, student will resolve the question and tell teacher the answer. | ||
If the answer is correct, teacher will create another question, otherwise, teacher will ask student to resolve the question again. | ||
The class will end when teacher has created 3 questions. | ||
|
||
This test case is created to test the following features: | ||
- speaker selection should work under a continuous q&a scenario among two agents and GPT 3.5 model. | ||
- admin should end the class when teacher has created 3 questions. | ||
""" | ||
skip_if_openai_not_available() | ||
config_list = autogen.config_list_from_json( | ||
OAI_CONFIG_LIST, | ||
file_location=KEY_LOC, | ||
filter_dict={ | ||
"model": ["gpt-3.5-turbo"], | ||
}, | ||
) | ||
gpt3_5_config = { | ||
"model": "gpt-3.5-turbo", | ||
"seed": random.randint(0, 100), # change the seed for different trials | ||
"temperature": 0, | ||
"config_list": config_list, | ||
"request_timeout": 120, | ||
} | ||
|
||
llm_config_for_user_proxy = { | ||
**gpt3_5_config, | ||
"functions": [ | ||
{ | ||
"name": "terminate_group_chat", | ||
"description": "terminate group chat", | ||
"parameters": { | ||
"type": "object", | ||
"properties": { | ||
"message": { | ||
"type": "string", | ||
"description": "terminate group chat message", | ||
}, | ||
}, | ||
"required": ["message"], | ||
}, | ||
} | ||
], | ||
} | ||
|
||
def terminate_group_chat(message): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @afourney This is how to achieve a more robust terminating strategy via function_call, could you review it? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @LittleLittleCloud I am having trouble understand why this strategy is more robust? Suggested strategy: one of the agents calls a group chat termination function. user proxy executes. manager detects termination string Old strategy: one of the agent generates a termination string. manager detects. If an agent is smart enough for suggested strategy then it should be able to also do old strategy? Sorry if I missed the argument for the increased robustness. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yes! Unfortunately, that's not the case in the real world, especially for Even for The new strategy (using termination_function_call) can make sure the group chat terminate correctly when that termination function_call get triggered. That strategy also works well on gpt-3.5-turbo which fined-tuned for function_call. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Got it -- the fact that function calls are prioritized makes it more robust. I added this comment to #525 |
||
return f"[GROUPCHAT_TERMINATE] {message}" | ||
|
||
user_proxy = autogen.UserProxyAgent( | ||
name="Admin", | ||
system_message="You terminate group chat when teacher says [COMPLETE].", | ||
code_execution_config=False, | ||
llm_config=llm_config_for_user_proxy, | ||
human_input_mode="NEVER", | ||
function_map={"terminate_group_chat": terminate_group_chat}, | ||
) | ||
|
||
llm_config_for_teacher = { | ||
**gpt3_5_config, | ||
"functions": [ | ||
{ | ||
"name": "create_math_question", | ||
"description": "create pre-school math question for student to resolve", | ||
"parameters": { | ||
"type": "object", | ||
"properties": { | ||
"question": { | ||
"type": "string", | ||
"description": "pre-school math question", | ||
}, | ||
"i": { | ||
"type": "integer", | ||
"description": "question index", | ||
}, | ||
}, | ||
"required": ["question", "i"], | ||
}, | ||
} | ||
], | ||
} | ||
|
||
def create_math_question(question, i): | ||
return f"[QUESTION] this is question #{i}: {question}" | ||
|
||
teacher = autogen.AssistantAgent( | ||
"teacher", | ||
system_message="""You are a pre-school math teacher, you create 3 math questions for student to resolve. | ||
Here's your workflow: | ||
-workflow- | ||
if question count > 3 say [COMPLETE]. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps it would be better to make the if-else grammar consistent in your code. |
||
else create_math_question | ||
if answer is correct: | ||
create_math_question | ||
else: | ||
ask student to resolve the question again | ||
""", | ||
llm_config=llm_config_for_teacher, | ||
function_map={"create_math_question": create_math_question}, | ||
) | ||
|
||
llm_config_for_student = { | ||
**gpt3_5_config, | ||
"functions": [ | ||
{ | ||
"name": "answer_math_question", | ||
"description": "answer math question from teacher", | ||
"parameters": { | ||
"type": "object", | ||
"properties": { | ||
"answer": { | ||
"type": "string", | ||
"description": "answer", | ||
}, | ||
}, | ||
"required": ["answer"], | ||
}, | ||
} | ||
], | ||
} | ||
|
||
def answer_math_question(answer): | ||
return f"[ANSWER] {answer}" | ||
|
||
student = autogen.AssistantAgent( | ||
"student", | ||
system_message="""You are a pre-school student, you resolve the math questions from teacher. | ||
Here's your workflow: | ||
-workflow- | ||
if question is received: | ||
call answer_math_question | ||
else: | ||
ask teacher to create a question | ||
""", | ||
llm_config=llm_config_for_student, | ||
function_map={"answer_math_question": answer_math_question}, | ||
) | ||
groupchat = autogen.GroupChat(agents=[user_proxy, student, teacher], messages=[], max_round=25) | ||
manager = autogen.GroupChatManager( | ||
groupchat=groupchat, | ||
llm_config=gpt3_5_config, | ||
is_termination_msg=lambda message: message.startswith("[GROUPCHAT_TERMINATE]"), | ||
) | ||
user_proxy.send( | ||
"welcome to the class. I'm admin here. Teacher, you create 3 math questions for student to answer. Let me know when student resolve all questions.", | ||
manager, | ||
) | ||
|
||
teacher.send("I'm teacher, I will create 3 math questions for student to answer.", manager) | ||
student.send("I'm student, I will answer teacher's questions.", manager) | ||
|
||
user_proxy.initiate_chat( | ||
manager, | ||
message="""teacher, please start""", | ||
) | ||
|
||
assert len(groupchat.messages) < 25 | ||
|
||
# verify if admin says [GROUPCHAT_TERMINATE] | ||
terminate_message = filter( | ||
lambda message: message["content"].startswith("[GROUPCHAT_TERMINATE]"), groupchat.messages | ||
) | ||
assert len(list(terminate_message)) == 1 | ||
|
||
# verify if teacher gives 3 questions | ||
question_message = filter(lambda message: message["content"].startswith("[QUESTION]"), groupchat.messages) | ||
assert len(list(question_message)) == 3 | ||
|
||
# verify if student gives more than 3 answers (student might give more than 3 answers if student's answer is not correct) | ||
answer_message = filter(lambda message: message["content"].startswith("[ANSWER]"), groupchat.messages) | ||
assert len(list(answer_message)) >= 3 | ||
|
||
|
||
def test_chat_manager(): | ||
agent1 = autogen.ConversableAgent( | ||
"alice", | ||
|
@@ -112,8 +289,16 @@ def test_plugin(): | |
assert len(groupchat.messages) == 2 | ||
|
||
|
||
def skip_if_openai_not_available(): | ||
try: | ||
import openai | ||
except ImportError: | ||
pytest.skip("OpenAI package not found.") | ||
|
||
|
||
if __name__ == "__main__": | ||
test_func_call_groupchat() | ||
test_group_chat_math_class() | ||
# test_func_call_groupchat() | ||
# test_broadcast() | ||
test_chat_manager() | ||
# test_chat_manager() | ||
# test_plugin() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice. This is better than my old way. @rickyloynd-microsoft @thinkall @kevin666aa FYI.