Merge branch 'main' into async-test

microsoft · Dec 17, 2023 · ebf5811 · ebf5811
2 parents 25eb730 + 083f522
commit ebf5811
Show file tree

Hide file tree

Showing 36 changed files with 1,182 additions and 95 deletions.
diff --git a/README.md b/README.md
@@ -119,6 +119,7 @@ After the repo is cloned.
 The figure below shows an example conversation flow with AutoGen.
 ![Agent Chat Example](https://github.com/microsoft/autogen/blob/main/website/static/img/chat_example.png)
 
+Alternatively, the [sample code](https://github.com/microsoft/autogen/blob/main/samples/simple_chat.py) here allows a user to chat with an AutoGen agent in ChatGPT style.
 Please find more [code examples](https://microsoft.github.io/autogen/docs/Examples#automated-multi-agent-chat) for this feature.
 
 ## Enhanced LLM Inferences

diff --git a/samples/apps/autogen-assistant/README.md b/samples/apps/autogen-assistant/README.md
@@ -8,7 +8,7 @@ AutoGen Assistant is an Autogen-powered AI app (user interface) that can convers
 
 Some of the capabilities supported by the app frontend include the following:
 
-- [x] Select fron a list of agents (current support for two agent workflows - `UserProxyAgent` and `AssistantAgent`)
+- [x] Select from a list of agents (current support for two agent workflows - `UserProxyAgent` and `AssistantAgent`)
 - [x] Modify agent configuration (e.g. temperature, model, agent system message, model etc) and chat with updated agent configurations.
 - [x] View agent messages and output files in the UI from agent runs.
 - [ ] Support for more complex agent workflows (e.g. `GroupChat` workflows)

diff --git a/samples/simple_chat.py b/samples/simple_chat.py
@@ -0,0 +1,22 @@
+from autogen import UserProxyAgent, ConversableAgent, config_list_from_json
+
+
+def main():
+    # Load LLM inference endpoints from an env variable or a file
+    # See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints
+    # and OAI_CONFIG_LIST_sample.
+    # For example, if you have created a OAI_CONFIG_LIST file in the current working directory, that file will be used.
+    config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST")
+
+    # Create the agent that uses the LLM.
+    assistant = ConversableAgent("agent", llm_config={"config_list": config_list})
+
+    # Create the agent that represents the user in the conversation.
+    user_proxy = UserProxyAgent("user", code_execution_config=False)
+
+    # Let the assistant start the conversation.  It will end when the user types exit.
+    assistant.initiate_chat(user_proxy, message="How can I help you today?")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/samples/tools/testbed/Dockerfile b/samples/tools/testbed/Dockerfile
@@ -0,0 +1,16 @@
+# Host a jsPsych experiment in Azure
+FROM python:3.11
+MAINTAINER AutoGen
+
+# Upgrade pip
+RUN pip install --upgrade pip
+
+# Set the image to the Pacific Timezone
+RUN ln -snf /usr/share/zoneinfo/US/Pacific /etc/localtime && echo "US/Pacific" > /etc/timezone
+
+# Pre-load autogen dependencies, but not autogen itself since we'll often want to install the latest from source
+RUN pip install pyautogen[teachable,lmm,graphs]
+RUN pip uninstall --yes pyautogen
+
+# Pre-load popular packages as per https://learnpython.com/blog/most-popular-python-packages/
+RUN pip install numpy pandas matplotlib seaborn scikit-learn requests urllib3 nltk pillow pytest
diff --git a/samples/tools/testbed/README.md b/samples/tools/testbed/README.md
@@ -46,6 +46,10 @@ options:
                 The requirements file to pip install before running the scenario. This file must be found in
                 the 'includes' directory. (default: requirements.txt)
 
+  -d DOCKER_IMAGE, --docker-image DOCKER_IMAGE
+                The Docker image to use when running scenarios. Can not be used together with --native.
+                (default: 'autogen/testbed:default', which will be created if not present)
+
   --native      Run the scenarios natively rather than in docker.
                 NOTE: This is not advisable, and should be done with great caution.
 ```
@@ -216,3 +220,20 @@ python ./run_scenarios.py ./scenarios/GAIA/gaia_validation_level_1__two_agents_g
 # Compute Metrics
 python utils/collate_gaia_csv.py ./results/gaia_validation_level_1__two_agents_gpt4 | python utils/metrics_gaia.py
 ```
+
+## (Example) Running tasks from AutoGPT
+
+The Testbed supports running tasks proposed in [AutoGPT benchmark](https://github.com/Significant-Gravitas/AutoGPT/tree/master/benchmark/agbenchmark/challenges). In this scenario, the agents are prompted to handle a diverse range of tasks, including coding, question answering according to given tasks, web scraping. Similar to scenarios in HumanEval, the agents can call the unit test script to check if the task is successfully done.
+
+Accessing this scenario-type requires converting tasks, running the Testbed, collating the results, and finally computing the metrics. The following commands will run each test instance with GPT-4:
+
+```
+# Convert tasks
+python utils/prepare_autogpt.py
+
+# Run all the scenarios with GPT-4
+python run_scenarios.py scenarios/AutoGPT/autogpt_twoagent_gpt4.jsonl
+
+# Compute metrics, the metric script shares the same one with HumanEval
+python utils/collate_autogpt.py ./results/autogpt_twoagent_gpt4 | python metrics_human_eval.py
+```
diff --git a/samples/tools/testbed/includes/requirements.txt b/samples/tools/testbed/includes/requirements.txt
@@ -1 +1,5 @@
 git+https://github.com/microsoft/autogen.git
+pandas
+beautifulsoup4
+requests
+pytest
diff --git a/samples/tools/testbed/run_scenarios.py b/samples/tools/testbed/run_scenarios.py
@@ -15,8 +15,12 @@
 # Location of the global includes dir. The contents of this directory will be copied to the Docker environment.
 GLOBAL_INCLUDES_DIR = "includes"
 
+# This is the tag given to the image that is *built* when no other image is provided.
+# Do not use this field to specify the name of an existing image (e.g., on Dockerhub)
+DEFAULT_DOCKER_IMAGE_TAG = "autogen/testbed:default"
 
-def run_scenarios(scenario, n_repeats, is_native, config_list, requirements, results_dir="results"):
+
+def run_scenarios(scenario, n_repeats, is_native, config_list, requirements, docker_image=None, results_dir="results"):
     """
     Run a set testbed scenarios a given number of times.
 
@@ -103,7 +107,7 @@ def run_scenarios(scenario, n_repeats, is_native, config_list, requirements, res
                     if is_native:
                         run_scenario_natively(results_repetition)
                     else:
-                        run_scenario_in_docker(results_repetition, requirements)
+                        run_scenario_in_docker(results_repetition, requirements, docker_image=docker_image)
 
 
 def expand_scenario(scenario_dir, scenario, output_dir):
@@ -244,7 +248,7 @@ def run_scenario_natively(work_dir):
     return
 
 
-def run_scenario_in_docker(work_dir, requirements, timeout=600):
+def run_scenario_in_docker(work_dir, requirements, timeout=600, docker_image=None):
     """
     Run a scenario in a Docker environment.
 
@@ -253,20 +257,34 @@ def run_scenario_in_docker(work_dir, requirements, timeout=600):
         timeout (Optional, int): the number of seconds to allow a Docker container to run before timing out
     """
 
-    # Create a docker client
     client = docker.from_env()
-    image_name = "python:3.11"
-
-    # Pull a suitable image
-    try:
-        image = client.images.get(image_name)
-    except docker.errors.ImageNotFound:
-        # pull the image
-        print("Pulling image", image_name)
+    image = None
+
+    # If the docker_image is None, then we will fetch DEFAULT_DOCKER_IMAGE_TAG, if present,
+    # or build it if missing.
+    if docker_image is None:
+        # Pull a suitable image
+        try:
+            image = client.images.get(DEFAULT_DOCKER_IMAGE_TAG)
+        except docker.errors.ImageNotFound:
+            print(f"Building default Docker image '{DEFAULT_DOCKER_IMAGE_TAG}'. This may take a few minutes...")
+            try:
+                build_default_docker_image(client, DEFAULT_DOCKER_IMAGE_TAG)
+                image = client.images.get(DEFAULT_DOCKER_IMAGE_TAG)
+            except docker.errors.DockerException:
+                print(f"Failed to build image '{DEFAULT_DOCKER_IMAGE_TAG}'")
+
+    # Otherwise get the requested image
+    else:
         try:
-            image = client.images.pull(image_name)
-        except docker.errors.DockerException:
-            print("Failed to pull image", image_name)
+            image = client.images.get(docker_image)
+        except docker.errors.ImageNotFound:
+            # pull the image
+            print(f"Pulling image '{docker_image}'")
+            try:
+                image = client.images.pull(docker_image)
+            except docker.errors.DockerException:
+                print(f"Failed to pull image '{docker_image}'")
 
     # Prepare the run script
     with open(os.path.join(work_dir, "run.sh"), "wt", newline="\n") as f:
@@ -351,6 +369,12 @@ def run_scenario_in_docker(work_dir, requirements, timeout=600):
         f.write(logs)
 
 
+def build_default_docker_image(docker_client, image_tag):
+    for segment in docker_client.api.build(path=".", dockerfile="Dockerfile", rm=True, tag=image_tag, decode=True):
+        if "stream" in segment:
+            sys.stdout.write(segment["stream"])
+
+
 ###############################################################################
 if __name__ == "__main__":
     script_name = os.path.basename(__file__)
@@ -382,6 +406,15 @@ def run_scenario_in_docker(work_dir, requirements, timeout=600):
         + "' directory. (default: requirements.txt)",
         default=None,
     )
+    parser.add_argument(
+        "-d",
+        "--docker-image",
+        type=str,
+        help="The Docker image to use when running scenarios. Can not be used together with --native. (default: '"
+        + DEFAULT_DOCKER_IMAGE_TAG
+        + "', which will be created if not present)",
+        default=None,
+    )
     parser.add_argument(
         "--native",
         action="store_true",
@@ -395,6 +428,10 @@ def run_scenario_in_docker(work_dir, requirements, timeout=600):
     if len(config_list) == 0:
         raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), args.config)
 
+    # Don't allow both --docker-image and --native on the same command
+    if args.docker_image is not None and args.native:
+        sys.exit("The options --native and --docker-image can not be used together. Exiting.")
+
     # Warn if running natively
     if args.native:
         if IS_WIN32:
@@ -434,4 +471,4 @@ def run_scenario_in_docker(work_dir, requirements, timeout=600):
             f"The environment file '{env_file}' does not exist (perhaps this is your first time setting up the testbed). A default environment file has been provided, but you may want to edit it to include your API keys and configurations.\n"
         )
 
-    run_scenarios(args.scenario, args.repeat, is_native, config_list, requirements)
+    run_scenarios(args.scenario, args.repeat, is_native, config_list, requirements, docker_image=args.docker_image)
diff --git a/samples/tools/testbed/scenarios/AutoGPT/README.md b/samples/tools/testbed/scenarios/AutoGPT/README.md
@@ -1,3 +1,3 @@
 The AutoGPT style tasks are contained in folder `challenges`.
 
-Run `python utils/prepare_data.py` to convert the tasks to jsonl format compatible for evaluation.
+Run `python ../../utils/prepare_autogpt.py` to convert the tasks to jsonl format compatible for evaluation.
diff --git a/samples/tools/testbed/scenarios/AutoGPT/Templates/TwoAgents/check.py b/samples/tools/testbed/scenarios/AutoGPT/Templates/TwoAgents/check.py
@@ -3,6 +3,7 @@
 import os
 import subprocess
 import sys
+import shutil
 
 
 def scoring(content: str, should_contain: list, should_not_contain: list):
@@ -28,7 +29,6 @@ def scoring(content: str, should_contain: list, should_not_contain: list):
 
 
 def check():
-    workspace = "coding"
     files_contents = []
     scores = []
 
@@ -54,9 +54,11 @@ def check():
 
     for file_path in matching_files:
         if eval_type == "python":
+            # copy the test file to working directory
+            shutil.copy(f"../custom_python/{file_path}", "./")
             result = subprocess.run(
                 [sys.executable, file_path],
-                cwd=os.path.abspath(workspace),
+                cwd=os.path.abspath("./"),
                 capture_output=True,
                 text=True,
             )

diff --git a/samples/tools/testbed/scenarios/AutoGPT/Templates/TwoAgents/scenario.py b/samples/tools/testbed/scenarios/AutoGPT/Templates/TwoAgents/scenario.py
@@ -24,24 +24,24 @@
         "work_dir": work_dir,
         "use_docker": False,
     },
-    max_consecutive_auto_reply=10,
+    max_consecutive_auto_reply=5,
     # default_auto_reply="TERMINATE",
 )
 
 if target_folder:
     # The tasks involves reading from a file then do sth to it.
     message = """
-    Your task is to: __TASK__ The file you needed is located in this directory: '__TARGET_FOLDER__'. You should save the output files in this directory: './'
-    Use the following command to check if all the unit tests have passed:
+    Here is the task description: __TASK__ The file you needed is located in this directory: '__TARGET_FOLDER__'. You should save the output files in the current directory: './'
+    Run the following command to check if all the unit tests have passed:
     ```bash
     python ../check.py
     ```
     You should refine the code and results until all the tests have passed.
     """
 else:
     message = """
-    Your task is to: __TASK__
-    Use the following command to check if all the unit tests have passed:
+    Here is the task description: __TASK__
+    Run the following command to check if all the unit tests have passed:
     ```bash
     python ../check.py
     ```

diff --git a/...ools/testbed/scenarios/AutoGPT/challenges/10_password_generator/custom_python/test_pwd.py b/...ools/testbed/scenarios/AutoGPT/challenges/10_password_generator/custom_python/test_pwd.py
@@ -0,0 +1,25 @@
+import unittest
+
+import password_generator
+
+
+class TestPasswordGenerator(unittest.TestCase):
+    def test_password_length(self):
+        for i in range(8, 17):
+            password = password_generator.generate_password(i)
+            self.assertEqual(len(password), i)
+
+    def test_value_error(self):
+        with self.assertRaises(ValueError):
+            password_generator.generate_password(7)
+        with self.assertRaises(ValueError):
+            password_generator.generate_password(17)
+
+    def test_password_content(self):
+        password = password_generator.generate_password()
+        self.assertTrue(any(c.isdigit() for c in password))
+        self.assertTrue(any(c in password_generator.string.punctuation for c in password))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/samples/tools/testbed/scenarios/AutoGPT/challenges/10_password_generator/data.json b/samples/tools/testbed/scenarios/AutoGPT/challenges/10_password_generator/data.json
@@ -0,0 +1,28 @@
+{
+    "category": [
+        "coding"
+    ],
+    "cutoff": 90,
+    "dependencies": [
+        "TestThreeSum"
+    ],
+    "eval_id": "ac75c471-e0ce-400c-ba9a-fb72aaab444f",
+    "ground": {
+        "answer": "password_generator.py is created and satisfies the requirements.",
+        "eval": {
+            "type": "python"
+        },
+        "files": [
+            "test_pwd.py"
+        ],
+        "should_contain": [],
+        "should_not_contain": []
+    },
+    "info": {
+        "description": "Tests if the agent can create a random password generator.",
+        "difficulty": "basic",
+        "side_effects": []
+    },
+    "name": "PasswordGenerator",
+    "task": "Create a random password generator. The password should have between 8 and 16 characters and should contain at least one letter, number and symbol. The password should be printed to the console. If no length is specified, the password should be 8 characters long. The password_generator should be imported as a module and called as password = password_generator.generate_password(length=x). Any invalid input should raise a ValueError."
+}
diff --git a/...estbed/scenarios/AutoGPT/challenges/11_file_organizer/custom_python/test_file_organize.py b/...estbed/scenarios/AutoGPT/challenges/11_file_organizer/custom_python/test_file_organize.py
@@ -0,0 +1,41 @@
+import os
+import subprocess
+import tempfile
+import unittest
+
+
+class TestOrganizeFiles(unittest.TestCase):
+    def setUp(self):
+        # Create temporary directory
+        self.test_dir = tempfile.mkdtemp()
+
+        # File types and their corresponding directory
+        self.file_types = {
+            "test_image.png": "images",
+            "test_doc.txt": "documents",
+            "test_audio.mp3": "audio",
+        }
+
+        # Create test files
+        for file_name in self.file_types.keys():
+            open(os.path.join(self.test_dir, file_name), "a").close()
+
+    def test_organize_files(self):
+        # Call the organize_files.py script using subprocess
+        subprocess.call(["python", "organize_files.py", "--directory_path=" + self.test_dir])
+
+        # Check if the files have been moved to the correct directories
+        for file_name, directory in self.file_types.items():
+            self.assertTrue(os.path.isfile(os.path.join(self.test_dir, directory, file_name)))
+
+    def tearDown(self):
+        # Delete test directory and its contents
+        for file_name, directory in self.file_types.items():
+            os.remove(os.path.join(self.test_dir, directory, file_name))
+        for directory in set(self.file_types.values()):
+            os.rmdir(os.path.join(self.test_dir, directory))
+        os.rmdir(self.test_dir)
+
+
+if __name__ == "__main__":
+    unittest.main()