From f6c2f9b4311a51b0abf0918f86b7d21c64e8a3b6 Mon Sep 17 00:00:00 2001
From: Adam Fourney <adamfo@microsoft.com>
Date: Mon, 13 Nov 2023 15:54:40 -0800
Subject: [PATCH] Allows users to specify a different requirements.txt file to
 install in Docker, to test other versions or branches of Autogen. Closes #662

---
 samples/tools/testbed/README.md               |  6 ++-
 .../tools/testbed/includes/requirements.txt   |  1 +
 samples/tools/testbed/run_scenarios.py        | 39 ++++++++++++++-----
 3 files changed, 36 insertions(+), 10 deletions(-)
 create mode 100644 samples/tools/testbed/includes/requirements.txt

diff --git a/samples/tools/testbed/README.md b/samples/tools/testbed/README.md
index f947a0a5d011..532a6470f623 100644
--- a/samples/tools/testbed/README.md
+++ b/samples/tools/testbed/README.md
@@ -37,11 +37,15 @@ options:
   -h, --help    show this help message and exit
 
   -r REPEAT, --repeat REPEAT
-                The number of repetitions to run for each scenario (default: 10).
+                The number of repetitions to run for each scenario (default: 1).
 
   -c CONFIG, --config CONFIG
                 The environment variable name or path to the OAI_CONFIG_LIST (default: OAI_CONFIG_LIST).
 
+  --requirements REQUIREMENTS
+                The requirements file to pip install before running the scenario. This file must be found in
+                the 'includes' directory. (default: requirements.txt)
+
   --native      Run the scenarios natively rather than in docker.
                 NOTE: This is not advisable, and should be done with great caution.
 ```
diff --git a/samples/tools/testbed/includes/requirements.txt b/samples/tools/testbed/includes/requirements.txt
new file mode 100644
index 000000000000..46ad1e009ca1
--- /dev/null
+++ b/samples/tools/testbed/includes/requirements.txt
@@ -0,0 +1 @@
+pyautogen
diff --git a/samples/tools/testbed/run_scenarios.py b/samples/tools/testbed/run_scenarios.py
index 335a12798546..3508e6b437f5 100644
--- a/samples/tools/testbed/run_scenarios.py
+++ b/samples/tools/testbed/run_scenarios.py
@@ -13,7 +13,7 @@
 INCLUDES_DIR = "includes"
 
 
-def run_scenarios(scenario, n_repeats, is_native, config_list, results_dir="results"):
+def run_scenarios(scenario, n_repeats, is_native, config_list, requirements, results_dir="results"):
     """
     Run a set testbed scenarios a given number of times.
 
@@ -107,7 +107,7 @@ def run_scenarios(scenario, n_repeats, is_native, config_list, results_dir="resu
                     if is_native:
                         run_scenario_natively(results_repetition)
                     else:
-                        run_scenario_in_docker(results_repetition)
+                        run_scenario_in_docker(results_repetition, requirements)
 
 
 def expand_scenario(scenario_dir, scenario, output_file):
@@ -161,7 +161,7 @@ def run_scenario_natively(work_dir):
     return
 
 
-def run_scenario_in_docker(work_dir, timeout=600):
+def run_scenario_in_docker(work_dir, requirements, timeout=600):
     """
     Run a scenario in a Docker environment.
 
@@ -188,9 +188,9 @@ def run_scenario_in_docker(work_dir, timeout=600):
     # Prepare the run script
     with open(os.path.join(work_dir, "run.sh"), "wt") as f:
         f.write(
-            """#
+            f"""#
 . ./ENV
-pip install pyautogen
+pip install -r {requirements}
 python scenario.py
 rm ENV
 echo SCENARIO COMPLETE !#!#
@@ -257,7 +257,15 @@ def run_scenario_in_docker(work_dir, timeout=600):
         default="OAI_CONFIG_LIST",
     )
     parser.add_argument(
-        "-r", "--repeat", type=int, help="The number of repetitions to run for each scenario (default: 10).", default=10
+        "-r", "--repeat", type=int, help="The number of repetitions to run for each scenario (default: 1).", default=1
+    )
+    parser.add_argument(
+        "--requirements",
+        type=str,
+        help="The requirements file to pip install before running the scenario. This file must be found in the '"
+        + INCLUDES_DIR
+        + "' directory. (default: requirements.txt)",
+        default=None,
     )
     parser.add_argument(
         "--native",
@@ -274,18 +282,31 @@ def run_scenario_in_docker(work_dir, timeout=600):
 
     # Warn if running natively
     if args.native:
+        if args.requirements is not None:
+            sys.exit("--requirements is not compatible with --native. Exiting.")
+
         choice = input(
             'WARNING: Running natively, without Docker, not only poses the usual risks of executing arbitrary AI generated code on your machine, it also makes it impossible to ensure that each test starts from a known and consistent set of initial conditions. For example, if the agents spend time debugging and installing Python libraries to solve the task, then those libraries will be available to all other runs. In other words, earlier runs can influence later runs, leading to many confounds in testing.\n\nAre you absolutely sure you want to continue with native execution? Type "Yes" exactly, and in full, to proceed: '
         )
 
         if choice.strip().lower() != "yes":
-            print("Received '" + choice + "'. Exiting.")
+            sys.exit("Received '" + choice + "'. Exiting.")
+
+    # What requirements file are we working with?
+    requirements = "requirements.txt"
+    if args.requirements is not None:
+        requirements = args.requirements
 
-    # Import docker if needed
     is_native = True if args.native else False
     if not is_native:
+        # Import docker
         import docker
 
+        # Make sure the requirements file exists
+        req_file = os.path.join(INCLUDES_DIR, requirements)
+        if not os.path.isfile(req_file):
+            raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), req_file)
+
     # Warn aboit a common error
     env_file = os.path.join(INCLUDES_DIR, "ENV")
     example_file = os.path.join(INCLUDES_DIR, "ENV.example")
@@ -295,4 +316,4 @@ def run_scenario_in_docker(work_dir, timeout=600):
             f"The environment file '{env_file}' does not exist (perhaps this is your first time setting up the testbed). A default environment file has been provided, but you may want to edit it to include your API keys and configurations.\n"
         )
 
-    run_scenarios(args.scenario, args.repeat, is_native, config_list)
+    run_scenarios(args.scenario, args.repeat, is_native, config_list, requirements)