diff --git a/.env.template b/.env.template index dd690308dd0e..d4d99baa2cfe 100644 --- a/.env.template +++ b/.env.template @@ -71,6 +71,7 @@ OPENAI_API_KEY=your-openai-api-key # TEMPERATURE=0 # USE_AZURE=False +# OPENAI_ORGANIZATION=your-openai-organization-key-if-applicable ### AZURE # moved to `azure.yaml.template` diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 368930a15f3f..5219b9826bd6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -5,6 +5,7 @@ on: branches: [ master, ci-test*] paths-ignore: - 'tests/Auto-GPT-test-cassettes' + - 'tests/integration/challenges/current_score.json' pull_request_target: branches: [ master, stable , ci-test*] @@ -119,6 +120,7 @@ jobs: - name: Run pytest tests with coverage run: | pytest -n auto --cov=autogpt --cov-report term-missing --cov-branch --cov-report xml --cov-report term + python tests/integration/challenges/utils/build_current_score.py env: CI: true PROXY: ${{ secrets.PROXY }} @@ -131,11 +133,20 @@ jobs: - name: Update cassette submodule to push target if push event if: ${{ github.event_name == 'push' }} run: | - cd tests/Auto-GPT-test-cassettes current_branch=$(echo ${{ github.ref }} | sed -e "s/refs\/heads\///g") - git fetch origin $current_branch git config --global user.name "Auto-GPT-Bot" git config --global user.email "github-bot@agpt.co" + git add tests/integration/challenges/current_score.json + + if ! git diff-index --quiet HEAD; then + git commit -m "Update current score" + git push origin HEAD:refs/heads/$current_branch + else + echo "The current score didn't change." + fi + + cd tests/Auto-GPT-test-cassettes + git fetch origin $current_branch git add . # Check if there are any changes @@ -150,7 +161,7 @@ jobs: git commit -m "Update submodule reference" git push origin HEAD:refs/heads/$current_branch else - echo "No changes to commit" + echo "No cassettes changes to commit" exit 0 fi @@ -182,7 +193,7 @@ jobs: echo "DIFF_EXISTS=false" >> $GITHUB_ENV fi - - name: Apply or remove prompt change label and comment + - name: Apply or remove behaviour change label and comment if: ${{ github.event_name == 'pull_request_target' }} run: | PR_NUMBER=${{ github.event.pull_request.number }} @@ -195,14 +206,14 @@ jobs: -H "Authorization: Bearer $TOKEN" \ -H "Accept: application/vnd.github.v3+json" \ https://api.github.com/repos/$REPO/issues/$PR_NUMBER/labels \ - -d '{"labels":["prompt change"]}' + -d '{"labels":["behaviour change"]}' echo $TOKEN | gh auth login --with-token - gh api repos/$REPO/issues/$PR_NUMBER/comments -X POST -F body="You changed AutoGPT's prompt. The cassettes have been updated and will be merged to the submodule when this Pull Request gets merged." + gh api repos/$REPO/issues/$PR_NUMBER/comments -X POST -F body="You changed AutoGPT's behaviour. The cassettes have been updated and will be merged to the submodule when this Pull Request gets merged." else echo "Removing label..." curl -X DELETE \ -H "Authorization: Bearer $TOKEN" \ -H "Accept: application/vnd.github.v3+json" \ - https://api.github.com/repos/$REPO/issues/$PR_NUMBER/labels/prompt%20change + https://api.github.com/repos/$REPO/issues/$PR_NUMBER/labels/behaviour%20change fi diff --git a/.github/workflows/docker-ci.yml b/.github/workflows/docker-ci.yml index 2cc3296b0499..a61b707d8e7c 100644 --- a/.github/workflows/docker-ci.yml +++ b/.github/workflows/docker-ci.yml @@ -3,6 +3,9 @@ name: Docker CI on: push: branches: [ master ] + paths-ignore: + - 'tests/Auto-GPT-test-cassettes' + - 'tests/integration/challenges/current_score.json' pull_request: branches: [ master, stable ] diff --git a/.github/workflows/pr-label.yml b/.github/workflows/pr-label.yml index ff4174ad81a0..0bab56385b3d 100644 --- a/.github/workflows/pr-label.yml +++ b/.github/workflows/pr-label.yml @@ -4,6 +4,9 @@ on: # So that PRs touching the same files as the push are updated push: branches: [ master ] + paths-ignore: + - 'tests/Auto-GPT-test-cassettes' + - 'tests/integration/challenges/current_score.json' # So that the `dirtyLabel` is removed if conflicts are resolve # We recommend `pull_request_target` so that github secrets are available. # In `pull_request` we wouldn't be able to change labels of fork PRs diff --git a/BULLETIN.md b/BULLETIN.md index 17c38b8c983c..70be3c3e3709 100644 --- a/BULLETIN.md +++ b/BULLETIN.md @@ -51,3 +51,9 @@ memory store was also temporarily removed but we aim to merge a new implementati before the next release. Whether built-in support for the others will be added back in the future is subject to discussion, feel free to pitch in: https://github.com/Significant-Gravitas/Auto-GPT/discussions/4280 + +# Challenge Workflow 🏆 +If you have been working on challenges... Thank You! +But to run the debugger challenge or other challenges using cassettes and VCR in docker, You will now need to `pip uninstall vcrpy` and `pip install -r requirements.txt` again. +This will install a new version of vcrpy that is compatible with running vcr in docker. +This workflow will be fixed as soon as the maintainer from VCRpy merges our changes. diff --git a/autogpt/agent/agent.py b/autogpt/agent/agent.py index a1673ad9b2d1..3dc4d390092b 100644 --- a/autogpt/agent/agent.py +++ b/autogpt/agent/agent.py @@ -128,11 +128,13 @@ def signal_handler(signum, frame): # Send message to AI, get response with Spinner("Thinking... ", plain_output=cfg.plain_output): assistant_reply = chat_with_ai( + cfg, self, self.system_prompt, self.triggering_prompt, cfg.fast_token_limit, - ) # TODO: This hardcodes the model to use GPT3.5. Make this an argument + cfg.fast_llm_model, + ) assistant_reply_json = fix_json_using_multiple_techniques(assistant_reply) for plugin in cfg.plugins: diff --git a/autogpt/config/config.py b/autogpt/config/config.py index 1e61b808561c..5f76bb745506 100644 --- a/autogpt/config/config.py +++ b/autogpt/config/config.py @@ -64,6 +64,7 @@ def __init__(self) -> None: ) self.openai_api_key = os.getenv("OPENAI_API_KEY") + self.openai_organization = os.getenv("OPENAI_ORGANIZATION") self.temperature = float(os.getenv("TEMPERATURE", "0")) self.use_azure = os.getenv("USE_AZURE") == "True" self.execute_local_commands = ( @@ -79,6 +80,9 @@ def __init__(self) -> None: openai.api_base = self.openai_api_base openai.api_version = self.openai_api_version + if self.openai_organization is not None: + openai.organization = self.openai_organization + self.elevenlabs_api_key = os.getenv("ELEVENLABS_API_KEY") self.elevenlabs_voice_1_id = os.getenv("ELEVENLABS_VOICE_1_ID") self.elevenlabs_voice_2_id = os.getenv("ELEVENLABS_VOICE_2_ID") diff --git a/autogpt/configurator.py b/autogpt/configurator.py index 6b855fe3243a..324f30843ea1 100644 --- a/autogpt/configurator.py +++ b/autogpt/configurator.py @@ -14,6 +14,9 @@ if TYPE_CHECKING: from autogpt.config import Config +GPT_4_MODEL = "gpt-4" +GPT_3_MODEL = "gpt-3.5-turbo" + def create_config( config: Config, @@ -51,8 +54,6 @@ def create_config( config.set_debug_mode(False) config.set_continuous_mode(False) config.set_speak_mode(False) - config.set_fast_llm_model(check_model(config.fast_llm_model, "fast_llm_model")) - config.set_smart_llm_model(check_model(config.smart_llm_model, "smart_llm_model")) if debug: logger.typewriter_log("Debug Mode: ", Fore.GREEN, "ENABLED") @@ -83,13 +84,26 @@ def create_config( logger.typewriter_log("Speak Mode: ", Fore.GREEN, "ENABLED") config.set_speak_mode(True) + # Set the default LLM models if gpt3only: logger.typewriter_log("GPT3.5 Only Mode: ", Fore.GREEN, "ENABLED") - config.set_smart_llm_model(config.fast_llm_model) - - if gpt4only: + # --gpt3only should always use gpt-3.5-turbo, despite user's FAST_LLM_MODEL config + config.set_fast_llm_model(GPT_3_MODEL) + config.set_smart_llm_model(GPT_3_MODEL) + + elif ( + gpt4only + and check_model(GPT_4_MODEL, model_type="smart_llm_model") == GPT_4_MODEL + ): logger.typewriter_log("GPT4 Only Mode: ", Fore.GREEN, "ENABLED") - config.set_fast_llm_model(config.smart_llm_model) + # --gpt4only should always use gpt-4, despite user's SMART_LLM_MODEL config + config.set_fast_llm_model(GPT_4_MODEL) + config.set_smart_llm_model(GPT_4_MODEL) + else: + config.set_fast_llm_model(check_model(config.fast_llm_model, "fast_llm_model")) + config.set_smart_llm_model( + check_model(config.smart_llm_model, "smart_llm_model") + ) if memory_type: supported_memory = get_supported_memory_backends() diff --git a/autogpt/llm/chat.py b/autogpt/llm/chat.py index 292990f5f044..7cb598256b72 100644 --- a/autogpt/llm/chat.py +++ b/autogpt/llm/chat.py @@ -13,29 +13,34 @@ from autogpt.log_cycle.log_cycle import CURRENT_CONTEXT_FILE_NAME from autogpt.logs import logger -cfg = Config() - # TODO: Change debug from hardcode to argument def chat_with_ai( + config: Config, agent: Agent, system_prompt: str, user_input: str, token_limit: int, + model: str | None = None, ): """ Interact with the OpenAI API, sending the prompt, user input, message history, and permanent memory. Args: + config (Config): The config to use. + agent (Agent): The agent to use. system_prompt (str): The prompt explaining the rules to the AI. user_input (str): The input from the user. token_limit (int): The maximum number of tokens allowed in the API call. + model (str, optional): The model to use. If None, the config.fast_llm_model will be used. Defaults to None. Returns: str: The AI's response. """ - model = cfg.fast_llm_model # TODO: Change model from hardcode to argument + if model is None: + model = config.fast_llm_model + # Reserve 1000 tokens for the response logger.debug(f"Token limit: {token_limit}") send_token_limit = token_limit - 1000 @@ -140,8 +145,8 @@ def chat_with_ai( # Append user input, the length of this is accounted for above message_sequence.append(user_input_msg) - plugin_count = len(cfg.plugins) - for i, plugin in enumerate(cfg.plugins): + plugin_count = len(config.plugins) + for i, plugin in enumerate(config.plugins): if not plugin.can_handle_on_planning(): continue plugin_response = plugin.on_planning( @@ -157,7 +162,6 @@ def chat_with_ai( logger.debug(f"Plugins remaining at stop: {plugin_count - i}") break message_sequence.add("system", plugin_response) - # Calculate remaining tokens tokens_remaining = token_limit - current_tokens_used # assert tokens_remaining >= 0, "Tokens remaining is negative. diff --git a/autogpt/main.py b/autogpt/main.py index 39bbf8b5ad9c..efc70aae27ff 100644 --- a/autogpt/main.py +++ b/autogpt/main.py @@ -22,6 +22,21 @@ from autogpt.workspace import Workspace from scripts.install_plugin_deps import install_plugin_dependencies +COMMAND_CATEGORIES = [ + "autogpt.commands.analyze_code", + "autogpt.commands.audio_text", + "autogpt.commands.execute_code", + "autogpt.commands.file_operations", + "autogpt.commands.git_operations", + "autogpt.commands.google_search", + "autogpt.commands.image_gen", + "autogpt.commands.improve_code", + "autogpt.commands.web_selenium", + "autogpt.commands.write_tests", + "autogpt.app", + "autogpt.commands.task_statuses", +] + def run_auto_gpt( continuous: bool, @@ -128,30 +143,18 @@ def run_auto_gpt( # Create a CommandRegistry instance and scan default folder command_registry = CommandRegistry() - command_categories = [ - "autogpt.commands.analyze_code", - "autogpt.commands.audio_text", - "autogpt.commands.execute_code", - "autogpt.commands.file_operations", - "autogpt.commands.git_operations", - "autogpt.commands.google_search", - "autogpt.commands.image_gen", - "autogpt.commands.improve_code", - "autogpt.commands.web_selenium", - "autogpt.commands.write_tests", - "autogpt.app", - "autogpt.commands.task_statuses", - ] logger.debug( f"The following command categories are disabled: {cfg.disabled_command_categories}" ) - command_categories = [ - x for x in command_categories if x not in cfg.disabled_command_categories + enabled_command_categories = [ + x for x in COMMAND_CATEGORIES if x not in cfg.disabled_command_categories ] - logger.debug(f"The following command categories are enabled: {command_categories}") + logger.debug( + f"The following command categories are enabled: {enabled_command_categories}" + ) - for command_category in command_categories: + for command_category in enabled_command_categories: command_registry.import_commands(command_category) ai_name = "" diff --git a/docs/challenges/information_retrieval/challenge_a.md b/docs/challenges/information_retrieval/challenge_a.md index 51762fc421cd..de21066ea550 100644 --- a/docs/challenges/information_retrieval/challenge_a.md +++ b/docs/challenges/information_retrieval/challenge_a.md @@ -1,16 +1,19 @@ # Information Retrieval Challenge A -**Status**: Current level to beat: level 1 +**Status**: Current level to beat: level 2 **Command to try**: ``` -pytest -s tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_a.py +pytest -s tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_a.py --level=2 ``` ## Description -The agent's goal is to find the revenue of Tesla in 2022. +The agent's goal is to find the revenue of Tesla: +- level 1 asks the revenue of Tesla in 2022 and explicitly asks to search for 'tesla revenue 2022' +- level 2 is identical but doesn't ask to search for 'tesla revenue 2022' +- level 3 asks for tesla's revenue by year since its creation. It should write the result in a file called output.txt. diff --git a/docs/challenges/information_retrieval/challenge_b.md b/docs/challenges/information_retrieval/challenge_b.md new file mode 100644 index 000000000000..bf77a984f646 --- /dev/null +++ b/docs/challenges/information_retrieval/challenge_b.md @@ -0,0 +1,22 @@ +# Information Retrieval Challenge B + +**Status**: Beaten + +**Command to try**: + +``` +pytest -s tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_b.py +``` + +## Description + +The agent's goal is to find the names, affiliated university, and discovery of the individuals who won the nobel prize for physics in 2010. + +It should write the result in a file called 2010_nobel_prize_winners.txt. + +The agent should be able to beat this test consistently (this is the hardest part). + +## Objective + +The objective of this challenge is to test the agent's ability to retrieve multiple pieces of related information in a consistent way. +The agent should not use google to perform the task, because it should already know the answer. This why the task fails after 2 cycles (1 cycle to retrieve information, 1 cycle to write the file) diff --git a/docs/setup.md b/docs/setup.md index 4bdf6a16b2e6..c4755a8d68db 100644 --- a/docs/setup.md +++ b/docs/setup.md @@ -34,13 +34,13 @@ Get your OpenAI API key from: [https://platform.openai.com/account/api-keys](htt ### Set up with Docker 1. Make sure you have Docker installed, see [requirements](#requirements) -2. Pull the latest image from [Docker Hub] +2. Create a project directory for Auto-GPT :::shell - docker pull significantgravitas/auto-gpt + mkdir Auto-GPT + cd Auto-GPT -3. Create a folder for Auto-GPT -4. In the folder, create a file called `docker-compose.yml` with the following contents: +3. In the project directory, create a file called `docker-compose.yml` with the following contents: :::yaml version: "3.9" @@ -71,8 +71,13 @@ Get your OpenAI API key from: [https://platform.openai.com/account/api-keys](htt redis: image: "redis/redis-stack-server:latest" -5. Create the necessary [configuration](#configuration) files. If needed, you can find +4. Create the necessary [configuration](#configuration) files. If needed, you can find templates in the [repository]. +5. Pull the latest image from [Docker Hub] + + :::shell + docker pull significantgravitas/auto-gpt + 6. Continue to [Run with Docker](#run-with-docker) !!! note "Docker only supports headless browsing" diff --git a/mkdocs.yml b/mkdocs.yml index 37732c56feb5..48fa0cb51dbb 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -28,6 +28,7 @@ nav: - Information retrieval: - Introduction: challenges/information_retrieval/introduction.md - Information Retrieval Challenge A: challenges/information_retrieval/challenge_a.md + - Information Retrieval Challenge B: challenges/information_retrieval/challenge_b.md - Submit a Challenge: challenges/submit.md - Beat a Challenge: challenges/beat.md diff --git a/requirements.txt b/requirements.txt index 542f9b50d92e..31f7706a30ff 100644 --- a/requirements.txt +++ b/requirements.txt @@ -58,6 +58,6 @@ pytest-benchmark pytest-cov pytest-integration pytest-mock -vcrpy +vcrpy @ git+https://github.com/Significant-Gravitas/vcrpy.git@master pytest-recording pytest-xdist diff --git a/tests/Auto-GPT-test-cassettes b/tests/Auto-GPT-test-cassettes index 909c30a0ab86..be280df43d6a 160000 --- a/tests/Auto-GPT-test-cassettes +++ b/tests/Auto-GPT-test-cassettes @@ -1 +1 @@ -Subproject commit 909c30a0ab8694db2ef63dacc0b3d6a4a0cd74f2 +Subproject commit be280df43d6a23b8074d9cba10d18ed8724a54c9 diff --git a/tests/integration/agent_factory.py b/tests/integration/agent_factory.py index 713afb79efb8..30d9cc13b2aa 100644 --- a/tests/integration/agent_factory.py +++ b/tests/integration/agent_factory.py @@ -3,6 +3,7 @@ from autogpt.agent import Agent from autogpt.commands.command import CommandRegistry from autogpt.config import AIConfig, Config +from autogpt.main import COMMAND_CATEGORIES from autogpt.memory.vector import NoMemory, get_memory from autogpt.prompts.prompt import DEFAULT_TRIGGERING_PROMPT from autogpt.workspace import Workspace @@ -140,27 +141,68 @@ def memory_management_agent(agent_test_config, memory_json_file, workspace: Work @pytest.fixture -def get_company_revenue_agent( +def information_retrieval_agents( agent_test_config, memory_json_file, workspace: Workspace ): + agents = [] + command_registry = CommandRegistry() + enabled_command_categories = [ + x + for x in COMMAND_CATEGORIES + if x not in agent_test_config.disabled_command_categories + ] + + for command_category in enabled_command_categories: + command_registry.import_commands(command_category) + ai_goals = [ + "Write to a file called output.txt tesla's revenue in 2022 after searching for 'tesla revenue 2022'.", + "Write to a file called output.txt tesla's revenue in 2022.", + "Write to a file called output.txt tesla's revenue every year since its creation.", + ] + for ai_goal in ai_goals: + ai_config = AIConfig( + ai_name="Information Retrieval Agent", + ai_role="an autonomous agent that specializes in retrieving information.", + ai_goals=[ai_goal], + ) + ai_config.command_registry = command_registry + system_prompt = ai_config.construct_full_prompt() + Config().set_continuous_mode(False) + agents.append( + Agent( + ai_name="Information Retrieval Agent", + memory=memory_json_file, + command_registry=command_registry, + config=ai_config, + next_action_count=0, + system_prompt=system_prompt, + triggering_prompt=DEFAULT_TRIGGERING_PROMPT, + workspace_directory=workspace.root, + ) + ) + return agents + + +@pytest.fixture +def kubernetes_agent(memory_json_file, workspace: Workspace): command_registry = CommandRegistry() command_registry.import_commands("autogpt.commands.file_operations") - command_registry.import_commands("autogpt.commands.google_search") command_registry.import_commands("autogpt.app") - command_registry.import_commands("autogpt.commands.task_statuses") ai_config = AIConfig( - ai_name="Information Retrieval Agent", - ai_role="an autonomous agent that specializes in retrieving information.", + ai_name="Kubernetes", + ai_role="an autonomous agent that specializes in creating Kubernetes deployment templates.", ai_goals=[ - "Search for 'tesla revenue 2022' and write the revenue of Tesla in 2022 to a file called output.txt. You should write the number without commas and you should not use signs like B for billion and M for million.", + "Write a simple kubernetes deployment file and save it as a kube.yaml.", + # You should make a simple nginx web server that uses docker and exposes the port 80. ], ) ai_config.command_registry = command_registry + system_prompt = ai_config.construct_full_prompt() Config().set_continuous_mode(False) agent = Agent( - ai_name="Get-CompanyRevenue", + ai_name="Kubernetes-Demo", memory=memory_json_file, command_registry=command_registry, config=ai_config, @@ -169,29 +211,69 @@ def get_company_revenue_agent( triggering_prompt=DEFAULT_TRIGGERING_PROMPT, workspace_directory=workspace.root, ) + return agent @pytest.fixture -def kubernetes_agent(memory_json_file, workspace: Workspace): +def get_nobel_prize_agent(agent_test_config, memory_json_file, workspace: Workspace): command_registry = CommandRegistry() command_registry.import_commands("autogpt.commands.file_operations") command_registry.import_commands("autogpt.app") + command_registry.import_commands("autogpt.commands.web_selenium") ai_config = AIConfig( - ai_name="Kubernetes", - ai_role="an autonomous agent that specializes in creating Kubernetes deployment templates.", + ai_name="Get-PhysicsNobelPrize", + ai_role="An autonomous agent that specializes in physics history.", ai_goals=[ - "Write a simple kubernetes deployment file and save it as a kube.yaml.", - # You should make a simple nginx web server that uses docker and exposes the port 80. + "Write to file the winner's name(s), affiliated university, and discovery of the 2010 nobel prize in physics. Write your final answer to 2010_nobel_prize_winners.txt.", ], ) ai_config.command_registry = command_registry system_prompt = ai_config.construct_full_prompt() Config().set_continuous_mode(False) + agent = Agent( - ai_name="Kubernetes-Demo", + ai_name="Get-PhysicsNobelPrize", + memory=memory_json_file, + command_registry=command_registry, + config=ai_config, + next_action_count=0, + system_prompt=system_prompt, + triggering_prompt=DEFAULT_TRIGGERING_PROMPT, + workspace_directory=workspace.root, + ) + + return agent + + +@pytest.fixture +def debug_code_agent(agent_test_config, memory_json_file, workspace: Workspace): + command_registry = CommandRegistry() + command_registry.import_commands("autogpt.commands.file_operations") + command_registry.import_commands("autogpt.commands.execute_code") + command_registry.import_commands("autogpt.commands.improve_code") + command_registry.import_commands("autogpt.app") + command_registry.import_commands("autogpt.commands.task_statuses") + + ai_config = AIConfig( + ai_name="Debug Code Agent", + ai_role="an autonomous agent that specializes in debugging python code", + ai_goals=[ + "1-Run the code in the file named 'code.py' using the execute_code command.", + "2-Read code.py to understand why the code is not working as expected.", + "3-Modify code.py to fix the error.", + "Repeat step 1, 2 and 3 until the code is working as expected. When you're done use the task_complete command.", + "Do not use any other commands than execute_python_file and write_file", + ], + ) + ai_config.command_registry = command_registry + + system_prompt = ai_config.construct_full_prompt() + Config().set_continuous_mode(False) + agent = Agent( + ai_name="Debug Code Agent", memory=memory_json_file, command_registry=command_registry, config=ai_config, diff --git a/tests/integration/challenges/challenge_decorator/challenge.py b/tests/integration/challenges/challenge_decorator/challenge.py index baf821a1dd37..fd3b60cb6cb1 100644 --- a/tests/integration/challenges/challenge_decorator/challenge.py +++ b/tests/integration/challenges/challenge_decorator/challenge.py @@ -9,6 +9,7 @@ def __init__( name: str, category: str, max_level: int, + is_new_challenge: bool, max_level_beaten: Optional[int], level_to_run: Optional[int] = None, ) -> None: @@ -19,3 +20,4 @@ def __init__( self.succeeded = False self.skipped = False self.level_to_run = level_to_run + self.is_new_challenge = is_new_challenge diff --git a/tests/integration/challenges/challenge_decorator/challenge_decorator.py b/tests/integration/challenges/challenge_decorator/challenge_decorator.py index 580dc0890697..fe12317eed8b 100644 --- a/tests/integration/challenges/challenge_decorator/challenge_decorator.py +++ b/tests/integration/challenges/challenge_decorator/challenge_decorator.py @@ -1,4 +1,3 @@ -import contextlib import os from functools import wraps from typing import Any, Callable, Optional @@ -23,6 +22,7 @@ def challenge(func: Callable[..., Any]) -> Callable[..., None]: @wraps(func) def wrapper(*args: Any, **kwargs: Any) -> None: run_remaining = MAX_LEVEL_TO_IMPROVE_ON if Challenge.BEAT_CHALLENGES else 1 + original_error = None while run_remaining > 0: current_score, new_score, new_score_location = get_scores() @@ -32,9 +32,12 @@ def wrapper(*args: Any, **kwargs: Any) -> None: ) if challenge.level_to_run is not None: kwargs["level_to_run"] = challenge.level_to_run - with contextlib.suppress(AssertionError): + try: func(*args, **kwargs) challenge.succeeded = True + except AssertionError as err: + original_error = err + challenge.succeeded = False else: challenge.skipped = True if os.environ.get("CI") == "true": @@ -48,9 +51,11 @@ def wrapper(*args: Any, **kwargs: Any) -> None: pytest.skip("This test has not been unlocked yet.") if not challenge.succeeded: - if Challenge.BEAT_CHALLENGES: + if Challenge.BEAT_CHALLENGES or challenge.is_new_challenge: # xfail pytest.xfail("Challenge failed") + if original_error: + raise original_error raise AssertionError("Challenge failed") run_remaining -= 1 diff --git a/tests/integration/challenges/challenge_decorator/challenge_utils.py b/tests/integration/challenges/challenge_decorator/challenge_utils.py index b94f71649038..7db7648fa4bc 100644 --- a/tests/integration/challenges/challenge_decorator/challenge_utils.py +++ b/tests/integration/challenges/challenge_decorator/challenge_utils.py @@ -13,13 +13,13 @@ def create_challenge( level_to_run: Optional[int] = None, ) -> Challenge: challenge_category, challenge_name = get_challenge_identifiers(func) - + is_new_challenge = challenge_name not in current_score.get(challenge_category, {}) max_level = get_max_level(current_score, challenge_category, challenge_name) max_level_beaten = get_max_level_beaten( current_score, challenge_category, challenge_name ) level_to_run = get_level_to_run( - is_beat_challenges, level_to_run, max_level, max_level_beaten + is_beat_challenges, level_to_run, max_level, max_level_beaten, is_new_challenge ) return Challenge( @@ -28,6 +28,7 @@ def create_challenge( max_level=max_level, max_level_beaten=max_level_beaten, level_to_run=level_to_run, + is_new_challenge=is_new_challenge, ) @@ -36,7 +37,10 @@ def get_level_to_run( level_to_run: Optional[int], max_level: int, max_level_beaten: Optional[int], + is_new_challenge: bool, ) -> Optional[int]: + if is_new_challenge: + return 1 if level_to_run is not None: if level_to_run > max_level: raise ValueError( diff --git a/tests/integration/challenges/conftest.py b/tests/integration/challenges/conftest.py index 8a1b5c406bfd..5514a1293fba 100644 --- a/tests/integration/challenges/conftest.py +++ b/tests/integration/challenges/conftest.py @@ -1,9 +1,31 @@ +from typing import Any, Dict, Optional + import pytest from _pytest.config import Config from _pytest.config.argparsing import Parser from _pytest.fixtures import FixtureRequest from tests.integration.challenges.challenge_decorator.challenge import Challenge +from tests.integration.conftest import BASE_VCR_CONFIG +from tests.vcr.vcr_filter import before_record_response + + +def before_record_response_filter_errors( + response: Dict[str, Any] +) -> Optional[Dict[str, Any]]: + """In challenges we don't want to record errors (See issue #4461)""" + if response["status"]["code"] >= 400: + return None + + return before_record_response(response) + + +@pytest.fixture(scope="module") +def vcr_config() -> Dict[str, Any]: + # this fixture is called by the pytest-recording vcr decorator. + return BASE_VCR_CONFIG | { + "before_record_response": before_record_response_filter_errors, + } def pytest_addoption(parser: Parser) -> None: diff --git a/tests/integration/challenges/current_score.json b/tests/integration/challenges/current_score.json index a734ff5d674d..726613991d48 100644 --- a/tests/integration/challenges/current_score.json +++ b/tests/integration/challenges/current_score.json @@ -9,8 +9,18 @@ "max_level_beaten": 1 } }, + "debug_code": { + "debug_code_challenge_a": { + "max_level": 1, + "max_level_beaten": 1 + } + }, "information_retrieval": { "information_retrieval_challenge_a": { + "max_level": 3, + "max_level_beaten": 1 + }, + "information_retrieval_challenge_b": { "max_level": 1, "max_level_beaten": 1 } @@ -28,7 +38,7 @@ }, "memory_challenge_b": { "max_level": 5, - "max_level_beaten": 1 + "max_level_beaten": null }, "memory_challenge_c": { "max_level": 5, diff --git a/tests/integration/challenges/debug_code/data/two_sum.py b/tests/integration/challenges/debug_code/data/two_sum.py new file mode 100644 index 000000000000..305cff4e41d0 --- /dev/null +++ b/tests/integration/challenges/debug_code/data/two_sum.py @@ -0,0 +1,19 @@ +# mypy: ignore-errors +from typing import List, Optional + + +def two_sum(nums: List, target: int) -> Optional[int]: + seen = {} + for i, num in enumerate(nums): + complement = target - num + if complement in seen: + return [seen[complement], i] + seen[num] = i + return None + + +# Example usage: +nums = [2, 7, 11, 15] +target = 9 +result = two_sum(nums, target) +print(result) # Output: [0, 1] diff --git a/tests/integration/challenges/debug_code/data/two_sum_tests.py b/tests/integration/challenges/debug_code/data/two_sum_tests.py new file mode 100644 index 000000000000..0eb89bcbfc95 --- /dev/null +++ b/tests/integration/challenges/debug_code/data/two_sum_tests.py @@ -0,0 +1,30 @@ +# mypy: ignore-errors +# we need a new line at the top of the file to avoid a syntax error + + +def test_two_sum(nums, target, expected_result): + # These tests are appended to the two_sum file so we can ignore this error for now + result = two_sum(nums, target) + print(result) + assert ( + result == expected_result + ), f"AssertionError: Expected the output to be {expected_result}" + + +# test the trivial case with the first two numbers +nums = [2, 7, 11, 15] +target = 9 +expected_result = [0, 1] +test_two_sum(nums, target, expected_result) + +# test for ability to use zero and the same number twice +nums = [2, 7, 0, 15, 12, 0] +target = 0 +expected_result = [2, 5] +test_two_sum(nums, target, expected_result) + +# test for first and last index usage and negative numbers +nums = [-6, 7, 11, 4] +target = -2 +expected_result = [0, 3] +test_two_sum(nums, target, expected_result) diff --git a/tests/integration/challenges/debug_code/test_debug_code_challenge_a.py b/tests/integration/challenges/debug_code/test_debug_code_challenge_a.py new file mode 100644 index 000000000000..008e562ce307 --- /dev/null +++ b/tests/integration/challenges/debug_code/test_debug_code_challenge_a.py @@ -0,0 +1,51 @@ +from pathlib import Path + +import pytest +from pytest_mock import MockerFixture + +from autogpt.agent import Agent +from autogpt.commands.execute_code import execute_python_file +from autogpt.commands.file_operations import append_to_file, write_to_file +from autogpt.config import Config +from tests.integration.challenges.challenge_decorator.challenge_decorator import ( + challenge, +) +from tests.integration.challenges.utils import run_interaction_loop +from tests.utils import requires_api_key + +CYCLE_COUNT = 5 + + +@pytest.mark.vcr +@requires_api_key("OPENAI_API_KEY") +@challenge +def test_debug_code_challenge_a( + debug_code_agent: Agent, + monkeypatch: pytest.MonkeyPatch, + patched_api_requestor: MockerFixture, + config: Config, + level_to_run: int, +) -> None: + """ + Test whether the agent can debug a simple code snippet. + + :param debug_code_agent: The agent to test. + :param monkeypatch: pytest's monkeypatch utility for modifying builtins. + :patched_api_requestor: Sends api requests to our API CI pipeline + :config: The config object for the agent. + :level_to_run: The level to run. + """ + + file_path = str(debug_code_agent.workspace.get_path("code.py")) + + code_file_path = Path(__file__).parent / "data" / "two_sum.py" + test_file_path = Path(__file__).parent / "data" / "two_sum_tests.py" + + write_to_file(file_path, code_file_path.read_text(), config) + + run_interaction_loop(monkeypatch, debug_code_agent, CYCLE_COUNT) + + append_to_file(file_path, test_file_path.read_text(), config) + + output = execute_python_file(file_path, config) + assert "error" not in output.lower(), f"Errors found in output: {output}!" diff --git a/tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_a.py b/tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_a.py index 7a9de8ab3dd6..6b970e8b227d 100644 --- a/tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_a.py +++ b/tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_a.py @@ -9,6 +9,7 @@ from tests.utils import requires_api_key CYCLE_COUNT = 3 +EXPECTED_REVENUES = [["81"], ["81"], ["81", "53", "24", "21", "11", "7", "4", "3", "2"]] from autogpt.agent import Agent @@ -16,7 +17,7 @@ @requires_api_key("OPENAI_API_KEY") @challenge def test_information_retrieval_challenge_a( - get_company_revenue_agent: Agent, + information_retrieval_agents: Agent, monkeypatch: pytest.MonkeyPatch, patched_api_requestor: None, config: Config, @@ -28,8 +29,13 @@ def test_information_retrieval_challenge_a( :param get_company_revenue_agent: The agent to test. :param monkeypatch: pytest's monkeypatch utility for modifying builtins. """ - run_interaction_loop(monkeypatch, get_company_revenue_agent, CYCLE_COUNT) + information_retrieval_agent = information_retrieval_agents[level_to_run - 1] + run_interaction_loop(monkeypatch, information_retrieval_agent, CYCLE_COUNT) - file_path = str(get_company_revenue_agent.workspace.get_path("output.txt")) + file_path = str(information_retrieval_agent.workspace.get_path("output.txt")) content = read_file(file_path, config) - assert "81" in content, "Expected the file to contain 81" + expected_revenues = EXPECTED_REVENUES[level_to_run - 1] + for revenue in expected_revenues: + assert ( + f"{revenue}." in content or f"{revenue}," in content + ), f"Expected the file to contain {revenue}" diff --git a/tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_b.py b/tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_b.py new file mode 100644 index 000000000000..feac95a0f646 --- /dev/null +++ b/tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_b.py @@ -0,0 +1,51 @@ +import contextlib + +import pytest + +from autogpt.agent import Agent +from autogpt.commands.file_operations import read_file +from autogpt.config import Config +from tests.integration.challenges.challenge_decorator.challenge_decorator import ( + challenge, +) +from tests.integration.challenges.utils import run_interaction_loop +from tests.utils import requires_api_key + +CYCLE_COUNT = 3 + + +@pytest.mark.vcr +@requires_api_key("OPENAI_API_KEY") +@challenge +def test_information_retrieval_challenge_b( + get_nobel_prize_agent: Agent, + monkeypatch: pytest.MonkeyPatch, + patched_api_requestor: None, + level_to_run: int, + config: Config, +) -> None: + """ + Test the challenge_b function in a given agent by mocking user inputs and checking the output file content. + + :param get_nobel_prize_agent: The agent to test. + :param monkeypatch: pytest's monkeypatch utility for modifying builtins. + :param patched_api_requestor: APIRequestor Patch to override the openai.api_requestor module for testing. + :param level_to_run: The level to run. + :param config: The config object. + """ + + with contextlib.suppress(SystemExit): + run_interaction_loop(monkeypatch, get_nobel_prize_agent, CYCLE_COUNT) + + file_path = str( + get_nobel_prize_agent.workspace.get_path("2010_nobel_prize_winners.txt") + ) + content = read_file(file_path, config) + assert "Andre Geim" in content, "Expected the file to contain Andre Geim" + assert ( + "Konstantin Novoselov" in content + ), "Expected the file to contain Konstantin Novoselov" + assert ( + "University of Manchester" in content + ), "Expected the file to contain University of Manchester" + assert "graphene" in content, "Expected the file to contain graphene" diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index cb49bc13a626..686f50be405f 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -7,21 +7,23 @@ from tests.conftest import PROXY from tests.vcr.vcr_filter import before_record_request, before_record_response +BASE_VCR_CONFIG = { + "record_mode": "new_episodes", + "before_record_request": before_record_request, + "before_record_response": before_record_response, + "filter_headers": [ + "Authorization", + "X-OpenAI-Client-User-Agent", + "User-Agent", + ], + "match_on": ["method", "body"], +} + @pytest.fixture(scope="session") def vcr_config(): # this fixture is called by the pytest-recording vcr decorator. - return { - "record_mode": "new_episodes", - "before_record_request": before_record_request, - "before_record_response": before_record_response, - "filter_headers": [ - "Authorization", - "X-OpenAI-Client-User-Agent", - "User-Agent", - ], - "match_on": ["method", "body"], - } + return BASE_VCR_CONFIG def patch_api_base(requestor): diff --git a/tests/test_image_gen.py b/tests/integration/test_image_gen.py similarity index 97% rename from tests/test_image_gen.py rename to tests/integration/test_image_gen.py index 5c04921b5a52..0156c9e5bdc9 100644 --- a/tests/test_image_gen.py +++ b/tests/integration/test_image_gen.py @@ -16,11 +16,9 @@ def image_size(request): return request.param -@pytest.mark.xfail( - reason="The image is too big to be put in a cassette for a CI pipeline. We're looking into a solution." -) @requires_api_key("OPENAI_API_KEY") -def test_dalle(config, workspace, image_size): +@pytest.mark.vcr +def test_dalle(config, workspace, image_size, patched_api_requestor): """Test DALL-E image generation.""" generate_and_validate( config, diff --git a/tests/test_config.py b/tests/test_config.py index 81d151cd242e..eb6946c91d22 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -2,14 +2,17 @@ Test cases for the Config class, which handles the configuration settings for the AI and ensures it behaves as a singleton. """ +from unittest import mock from unittest.mock import patch import pytest -from autogpt.configurator import create_config +from autogpt.config.config import Config +from autogpt.configurator import GPT_3_MODEL, GPT_4_MODEL, create_config +from autogpt.workspace.workspace import Workspace -def test_initial_values(config): +def test_initial_values(config: Config): """ Test if the initial values of the Config class attributes are set correctly. """ @@ -22,7 +25,7 @@ def test_initial_values(config): assert config.smart_token_limit == 8000 -def test_set_continuous_mode(config): +def test_set_continuous_mode(config: Config): """ Test if the set_continuous_mode() method updates the continuous_mode attribute. """ @@ -36,7 +39,7 @@ def test_set_continuous_mode(config): config.set_continuous_mode(continuous_mode) -def test_set_speak_mode(config): +def test_set_speak_mode(config: Config): """ Test if the set_speak_mode() method updates the speak_mode attribute. """ @@ -50,7 +53,7 @@ def test_set_speak_mode(config): config.set_speak_mode(speak_mode) -def test_set_fast_llm_model(config): +def test_set_fast_llm_model(config: Config): """ Test if the set_fast_llm_model() method updates the fast_llm_model attribute. """ @@ -64,7 +67,7 @@ def test_set_fast_llm_model(config): config.set_fast_llm_model(fast_llm_model) -def test_set_smart_llm_model(config): +def test_set_smart_llm_model(config: Config): """ Test if the set_smart_llm_model() method updates the smart_llm_model attribute. """ @@ -78,7 +81,7 @@ def test_set_smart_llm_model(config): config.set_smart_llm_model(smart_llm_model) -def test_set_fast_token_limit(config): +def test_set_fast_token_limit(config: Config): """ Test if the set_fast_token_limit() method updates the fast_token_limit attribute. """ @@ -92,7 +95,7 @@ def test_set_fast_token_limit(config): config.set_fast_token_limit(fast_token_limit) -def test_set_smart_token_limit(config): +def test_set_smart_token_limit(config: Config): """ Test if the set_smart_token_limit() method updates the smart_token_limit attribute. """ @@ -106,7 +109,7 @@ def test_set_smart_token_limit(config): config.set_smart_token_limit(smart_token_limit) -def test_set_debug_mode(config): +def test_set_debug_mode(config: Config): """ Test if the set_debug_mode() method updates the debug_mode attribute. """ @@ -121,7 +124,7 @@ def test_set_debug_mode(config): @patch("openai.Model.list") -def test_smart_and_fast_llm_models_set_to_gpt4(mock_list_models, config): +def test_smart_and_fast_llm_models_set_to_gpt4(mock_list_models, config: Config): """ Test if models update to gpt-3.5-turbo if both are set to gpt-4. """ @@ -158,7 +161,7 @@ def test_smart_and_fast_llm_models_set_to_gpt4(mock_list_models, config): config.set_smart_llm_model(smart_llm_model) -def test_missing_azure_config(config, workspace): +def test_missing_azure_config(config: Config, workspace: Workspace): config_file = workspace.get_path("azure_config.yaml") with pytest.raises(FileNotFoundError): config.load_azure_config(str(config_file)) @@ -170,3 +173,61 @@ def test_missing_azure_config(config, workspace): assert config.openai_api_base == "" assert config.openai_api_version == "2023-03-15-preview" assert config.azure_model_to_deployment_id_map == {} + + +def test_create_config_gpt4only(config: Config) -> None: + fast_llm_model = config.fast_llm_model + smart_llm_model = config.smart_llm_model + with mock.patch("autogpt.llm.api_manager.ApiManager.get_models") as mock_get_models: + mock_get_models.return_value = [{"id": GPT_4_MODEL}] + create_config( + config=config, + continuous=False, + continuous_limit=None, + ai_settings_file=None, + prompt_settings_file=None, + skip_reprompt=False, + speak=False, + debug=False, + gpt3only=False, + gpt4only=True, + memory_type=None, + browser_name=None, + allow_downloads=False, + skip_news=False, + ) + assert config.fast_llm_model == GPT_4_MODEL + assert config.smart_llm_model == GPT_4_MODEL + + # Reset config + config.set_fast_llm_model(fast_llm_model) + config.set_smart_llm_model(smart_llm_model) + + +def test_create_config_gpt3only(config: Config) -> None: + fast_llm_model = config.fast_llm_model + smart_llm_model = config.smart_llm_model + with mock.patch("autogpt.llm.api_manager.ApiManager.get_models") as mock_get_models: + mock_get_models.return_value = [{"id": GPT_3_MODEL}] + create_config( + config=config, + continuous=False, + continuous_limit=None, + ai_settings_file=None, + prompt_settings_file=None, + skip_reprompt=False, + speak=False, + debug=False, + gpt3only=True, + gpt4only=False, + memory_type=None, + browser_name=None, + allow_downloads=False, + skip_news=False, + ) + assert config.fast_llm_model == GPT_3_MODEL + assert config.smart_llm_model == GPT_3_MODEL + + # Reset config + config.set_fast_llm_model(fast_llm_model) + config.set_smart_llm_model(smart_llm_model)