Merge branch 'master' into test-add-first-chat-test

Significant-Gravitas · May 31, 2023 · f9388aa · f9388aa
2 parents 8c6c30c + 63b79a8
commit f9388aa
Show file tree

Hide file tree

Showing 30 changed files with 522 additions and 97 deletions.
diff --git a/.env.template b/.env.template
@@ -71,6 +71,7 @@
 OPENAI_API_KEY=your-openai-api-key
 # TEMPERATURE=0
 # USE_AZURE=False
+# OPENAI_ORGANIZATION=your-openai-organization-key-if-applicable
 
 ### AZURE
 # moved to `azure.yaml.template`

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -5,6 +5,7 @@ on:
     branches: [ master, ci-test*]
     paths-ignore:
       - 'tests/Auto-GPT-test-cassettes'
+      - 'tests/integration/challenges/current_score.json'
   pull_request_target:
     branches: [ master, stable , ci-test*]
 
@@ -119,6 +120,7 @@ jobs:
       - name: Run pytest tests with coverage
         run: |
           pytest -n auto --cov=autogpt --cov-report term-missing --cov-branch --cov-report xml --cov-report term
+          python tests/integration/challenges/utils/build_current_score.py
         env:
           CI: true
           PROXY: ${{ secrets.PROXY }}
@@ -131,11 +133,20 @@ jobs:
       - name: Update cassette submodule to push target if push event
         if: ${{ github.event_name == 'push' }}
         run: |
-          cd tests/Auto-GPT-test-cassettes
           current_branch=$(echo ${{ github.ref }} | sed -e "s/refs\/heads\///g")
-          git fetch origin $current_branch
           git config --global user.name "Auto-GPT-Bot"
           git config --global user.email "[email protected]"
+          git add tests/integration/challenges/current_score.json
+
+          if ! git diff-index --quiet HEAD; then
+              git commit -m "Update current score"
+              git push origin HEAD:refs/heads/$current_branch
+          else
+              echo "The current score didn't change."
+          fi
+          
+          cd tests/Auto-GPT-test-cassettes
+          git fetch origin $current_branch
           git add .
 
           # Check if there are any changes
@@ -150,7 +161,7 @@ jobs:
               git commit -m "Update submodule reference"
               git push origin HEAD:refs/heads/$current_branch
           else
-              echo "No changes to commit"
+              echo "No cassettes changes to commit"
               exit 0
           fi
 
@@ -182,7 +193,7 @@ jobs:
             echo "DIFF_EXISTS=false" >> $GITHUB_ENV
           fi
 
-      - name: Apply or remove prompt change label and comment
+      - name: Apply or remove behaviour change label and comment
         if: ${{ github.event_name == 'pull_request_target' }}
         run: |
           PR_NUMBER=${{ github.event.pull_request.number }}
@@ -195,14 +206,14 @@ jobs:
             -H "Authorization: Bearer $TOKEN" \
             -H "Accept: application/vnd.github.v3+json" \
             https://api.github.com/repos/$REPO/issues/$PR_NUMBER/labels \
-            -d '{"labels":["prompt change"]}'
+            -d '{"labels":["behaviour change"]}'
 
             echo $TOKEN | gh auth login --with-token
-            gh api repos/$REPO/issues/$PR_NUMBER/comments -X POST -F body="You changed AutoGPT's prompt. The cassettes have been updated and will be merged to the submodule when this Pull Request gets merged."
+            gh api repos/$REPO/issues/$PR_NUMBER/comments -X POST -F body="You changed AutoGPT's behaviour. The cassettes have been updated and will be merged to the submodule when this Pull Request gets merged."
           else
             echo "Removing label..."
             curl -X DELETE \
             -H "Authorization: Bearer $TOKEN" \
             -H "Accept: application/vnd.github.v3+json" \
-            https://api.github.com/repos/$REPO/issues/$PR_NUMBER/labels/prompt%20change
+            https://api.github.com/repos/$REPO/issues/$PR_NUMBER/labels/behaviour%20change
           fi
diff --git a/.github/workflows/docker-ci.yml b/.github/workflows/docker-ci.yml
@@ -3,6 +3,9 @@ name: Docker CI
 on:
   push:
     branches: [ master ]
+    paths-ignore:
+      - 'tests/Auto-GPT-test-cassettes'
+      - 'tests/integration/challenges/current_score.json'
   pull_request:
     branches: [ master, stable ]
 

diff --git a/.github/workflows/pr-label.yml b/.github/workflows/pr-label.yml
@@ -4,6 +4,9 @@ on:
   # So that PRs touching the same files as the push are updated
   push:
     branches: [ master ]
+    paths-ignore:
+      - 'tests/Auto-GPT-test-cassettes'
+      - 'tests/integration/challenges/current_score.json'
   # So that the `dirtyLabel` is removed if conflicts are resolve
   # We recommend `pull_request_target` so that github secrets are available.
   # In `pull_request` we wouldn't be able to change labels of fork PRs

diff --git a/BULLETIN.md b/BULLETIN.md
@@ -51,3 +51,9 @@ memory store was also temporarily removed but we aim to merge a new implementati
 before the next release.
 Whether built-in support for the others will be added back in the future is subject to
 discussion, feel free to pitch in: https://github.com/Significant-Gravitas/Auto-GPT/discussions/4280
+
+# Challenge Workflow 🏆
+If you have been working on challenges... Thank You!
+But to run the debugger challenge or other challenges using cassettes and VCR in docker, You will now need to `pip uninstall vcrpy` and `pip install -r requirements.txt` again.
+This will install a new version of vcrpy that is compatible with running vcr in docker.
+This workflow will be fixed as soon as the maintainer from VCRpy merges our changes.
diff --git a/autogpt/agent/agent.py b/autogpt/agent/agent.py
@@ -128,11 +128,13 @@ def signal_handler(signum, frame):
             # Send message to AI, get response
             with Spinner("Thinking... ", plain_output=cfg.plain_output):
                 assistant_reply = chat_with_ai(
+                    cfg,
                     self,
                     self.system_prompt,
                     self.triggering_prompt,
                     cfg.fast_token_limit,
-                )  # TODO: This hardcodes the model to use GPT3.5. Make this an argument
+                    cfg.fast_llm_model,
+                )
 
             assistant_reply_json = fix_json_using_multiple_techniques(assistant_reply)
             for plugin in cfg.plugins:

diff --git a/autogpt/config/config.py b/autogpt/config/config.py
@@ -64,6 +64,7 @@ def __init__(self) -> None:
         )
 
         self.openai_api_key = os.getenv("OPENAI_API_KEY")
+        self.openai_organization = os.getenv("OPENAI_ORGANIZATION")
         self.temperature = float(os.getenv("TEMPERATURE", "0"))
         self.use_azure = os.getenv("USE_AZURE") == "True"
         self.execute_local_commands = (
@@ -79,6 +80,9 @@ def __init__(self) -> None:
             openai.api_base = self.openai_api_base
             openai.api_version = self.openai_api_version
 
+        if self.openai_organization is not None:
+            openai.organization = self.openai_organization
+
         self.elevenlabs_api_key = os.getenv("ELEVENLABS_API_KEY")
         self.elevenlabs_voice_1_id = os.getenv("ELEVENLABS_VOICE_1_ID")
         self.elevenlabs_voice_2_id = os.getenv("ELEVENLABS_VOICE_2_ID")

diff --git a/autogpt/configurator.py b/autogpt/configurator.py
@@ -14,6 +14,9 @@
 if TYPE_CHECKING:
     from autogpt.config import Config
 
+GPT_4_MODEL = "gpt-4"
+GPT_3_MODEL = "gpt-3.5-turbo"
+
 
 def create_config(
     config: Config,
@@ -51,8 +54,6 @@ def create_config(
     config.set_debug_mode(False)
     config.set_continuous_mode(False)
     config.set_speak_mode(False)
-    config.set_fast_llm_model(check_model(config.fast_llm_model, "fast_llm_model"))
-    config.set_smart_llm_model(check_model(config.smart_llm_model, "smart_llm_model"))
 
     if debug:
         logger.typewriter_log("Debug Mode: ", Fore.GREEN, "ENABLED")
@@ -83,13 +84,26 @@ def create_config(
         logger.typewriter_log("Speak Mode: ", Fore.GREEN, "ENABLED")
         config.set_speak_mode(True)
 
+    # Set the default LLM models
     if gpt3only:
         logger.typewriter_log("GPT3.5 Only Mode: ", Fore.GREEN, "ENABLED")
-        config.set_smart_llm_model(config.fast_llm_model)
-
-    if gpt4only:
+        # --gpt3only should always use gpt-3.5-turbo, despite user's FAST_LLM_MODEL config
+        config.set_fast_llm_model(GPT_3_MODEL)
+        config.set_smart_llm_model(GPT_3_MODEL)
+
+    elif (
+        gpt4only
+        and check_model(GPT_4_MODEL, model_type="smart_llm_model") == GPT_4_MODEL
+    ):
         logger.typewriter_log("GPT4 Only Mode: ", Fore.GREEN, "ENABLED")
-        config.set_fast_llm_model(config.smart_llm_model)
+        # --gpt4only should always use gpt-4, despite user's SMART_LLM_MODEL config
+        config.set_fast_llm_model(GPT_4_MODEL)
+        config.set_smart_llm_model(GPT_4_MODEL)
+    else:
+        config.set_fast_llm_model(check_model(config.fast_llm_model, "fast_llm_model"))
+        config.set_smart_llm_model(
+            check_model(config.smart_llm_model, "smart_llm_model")
+        )
 
     if memory_type:
         supported_memory = get_supported_memory_backends()

diff --git a/autogpt/llm/chat.py b/autogpt/llm/chat.py
@@ -13,29 +13,34 @@
 from autogpt.log_cycle.log_cycle import CURRENT_CONTEXT_FILE_NAME
 from autogpt.logs import logger
 
-cfg = Config()
-
 
 # TODO: Change debug from hardcode to argument
 def chat_with_ai(
+    config: Config,
     agent: Agent,
     system_prompt: str,
     user_input: str,
     token_limit: int,
+    model: str | None = None,
 ):
     """
     Interact with the OpenAI API, sending the prompt, user input,
         message history, and permanent memory.
 
     Args:
+        config (Config): The config to use.
+        agent (Agent): The agent to use.
         system_prompt (str): The prompt explaining the rules to the AI.
         user_input (str): The input from the user.
         token_limit (int): The maximum number of tokens allowed in the API call.
+        model (str, optional): The model to use. If None, the config.fast_llm_model will be used. Defaults to None.
 
     Returns:
     str: The AI's response.
     """
-    model = cfg.fast_llm_model  # TODO: Change model from hardcode to argument
+    if model is None:
+        model = config.fast_llm_model
+
     # Reserve 1000 tokens for the response
     logger.debug(f"Token limit: {token_limit}")
     send_token_limit = token_limit - 1000
@@ -140,8 +145,8 @@ def chat_with_ai(
     # Append user input, the length of this is accounted for above
     message_sequence.append(user_input_msg)
 
-    plugin_count = len(cfg.plugins)
-    for i, plugin in enumerate(cfg.plugins):
+    plugin_count = len(config.plugins)
+    for i, plugin in enumerate(config.plugins):
         if not plugin.can_handle_on_planning():
             continue
         plugin_response = plugin.on_planning(
@@ -157,7 +162,6 @@ def chat_with_ai(
             logger.debug(f"Plugins remaining at stop: {plugin_count - i}")
             break
         message_sequence.add("system", plugin_response)
-
     # Calculate remaining tokens
     tokens_remaining = token_limit - current_tokens_used
     # assert tokens_remaining >= 0, "Tokens remaining is negative.

diff --git a/autogpt/main.py b/autogpt/main.py
@@ -22,6 +22,21 @@
 from autogpt.workspace import Workspace
 from scripts.install_plugin_deps import install_plugin_dependencies
 
+COMMAND_CATEGORIES = [
+    "autogpt.commands.analyze_code",
+    "autogpt.commands.audio_text",
+    "autogpt.commands.execute_code",
+    "autogpt.commands.file_operations",
+    "autogpt.commands.git_operations",
+    "autogpt.commands.google_search",
+    "autogpt.commands.image_gen",
+    "autogpt.commands.improve_code",
+    "autogpt.commands.web_selenium",
+    "autogpt.commands.write_tests",
+    "autogpt.app",
+    "autogpt.commands.task_statuses",
+]
+
 
 def run_auto_gpt(
     continuous: bool,
@@ -128,30 +143,18 @@ def run_auto_gpt(
     # Create a CommandRegistry instance and scan default folder
     command_registry = CommandRegistry()
 
-    command_categories = [
-        "autogpt.commands.analyze_code",
-        "autogpt.commands.audio_text",
-        "autogpt.commands.execute_code",
-        "autogpt.commands.file_operations",
-        "autogpt.commands.git_operations",
-        "autogpt.commands.google_search",
-        "autogpt.commands.image_gen",
-        "autogpt.commands.improve_code",
-        "autogpt.commands.web_selenium",
-        "autogpt.commands.write_tests",
-        "autogpt.app",
-        "autogpt.commands.task_statuses",
-    ]
     logger.debug(
         f"The following command categories are disabled: {cfg.disabled_command_categories}"
     )
-    command_categories = [
-        x for x in command_categories if x not in cfg.disabled_command_categories
+    enabled_command_categories = [
+        x for x in COMMAND_CATEGORIES if x not in cfg.disabled_command_categories
     ]
 
-    logger.debug(f"The following command categories are enabled: {command_categories}")
+    logger.debug(
+        f"The following command categories are enabled: {enabled_command_categories}"
+    )
 
-    for command_category in command_categories:
+    for command_category in enabled_command_categories:
         command_registry.import_commands(command_category)
 
     ai_name = ""

diff --git a/docs/challenges/information_retrieval/challenge_a.md b/docs/challenges/information_retrieval/challenge_a.md
@@ -1,16 +1,19 @@
 # Information Retrieval Challenge A
 
-**Status**: Current level to beat: level 1
+**Status**: Current level to beat: level 2
 
 **Command to try**:
 
 ```
-pytest -s tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_a.py
+pytest -s tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_a.py --level=2
 ```
 
 ## Description
 
-The agent's goal is to find the revenue of Tesla in 2022.
+The agent's goal is to find the revenue of Tesla:
+- level 1 asks the revenue of Tesla in 2022 and explicitly asks to search for 'tesla revenue 2022'
+- level 2 is identical but doesn't ask to search for 'tesla revenue 2022'
+- level 3 asks for tesla's revenue by year since its creation.
 
 It should write the result in a file called output.txt.
 

diff --git a/docs/challenges/information_retrieval/challenge_b.md b/docs/challenges/information_retrieval/challenge_b.md
@@ -0,0 +1,22 @@
+# Information Retrieval Challenge B
+
+**Status**: Beaten
+
+**Command to try**:
+
+```
+pytest -s tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_b.py
+```
+
+## Description
+
+The agent's goal is to find the names, affiliated university, and discovery of the individuals who won the nobel prize for physics in 2010.
+
+It should write the result in a file called 2010_nobel_prize_winners.txt.
+
+The agent should be able to beat this test consistently (this is the hardest part).
+
+## Objective
+
+The objective of this challenge is to test the agent's ability to retrieve multiple pieces of related information in a consistent way.
+The agent should not use google to perform the task, because it should already know the answer. This why the task fails after 2 cycles (1 cycle to retrieve information, 1 cycle to write the file)
diff --git a/docs/setup.md b/docs/setup.md
@@ -34,13 +34,13 @@ Get your OpenAI API key from: [https://platform.openai.com/account/api-keys](htt
 ### Set up with Docker
 
 1. Make sure you have Docker installed, see [requirements](#requirements)
-2. Pull the latest image from [Docker Hub]
+2. Create a project directory for Auto-GPT
 
         :::shell
-        docker pull significantgravitas/auto-gpt
+        mkdir Auto-GPT
+        cd Auto-GPT
 
-3. Create a folder for Auto-GPT
-4. In the folder, create a file called `docker-compose.yml` with the following contents:
+3. In the project directory, create a file called `docker-compose.yml` with the following contents:
 
         :::yaml
         version: "3.9"
@@ -71,8 +71,13 @@ Get your OpenAI API key from: [https://platform.openai.com/account/api-keys](htt
           redis:
             image: "redis/redis-stack-server:latest"
 
-5. Create the necessary [configuration](#configuration) files. If needed, you can find
+4. Create the necessary [configuration](#configuration) files. If needed, you can find
     templates in the [repository].
+5. Pull the latest image from [Docker Hub]
+
+        :::shell
+        docker pull significantgravitas/auto-gpt
+
 6. Continue to [Run with Docker](#run-with-docker)
 
 !!! note "Docker only supports headless browsing"

diff --git a/mkdocs.yml b/mkdocs.yml
@@ -28,6 +28,7 @@ nav:
       - Information retrieval:
         - Introduction: challenges/information_retrieval/introduction.md
         - Information Retrieval Challenge A: challenges/information_retrieval/challenge_a.md
+        - Information Retrieval Challenge B: challenges/information_retrieval/challenge_b.md
   - Submit a Challenge: challenges/submit.md
   - Beat a Challenge: challenges/beat.md