Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create "Debug Code Challenge" #4286

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions BULLETIN.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,9 @@ memory store was also temporarily removed but we aim to merge a new implementati
before the next release.
Whether built-in support for the others will be added back in the future is subject to
discussion, feel free to pitch in: https://github.com/Significant-Gravitas/Auto-GPT/discussions/4280

# Challenge Workflow 🏆
If you have been working on challenges... Thank You!
But to run the debugger challenge or other challenges using cassettes and VCR in docker, You will now need to `pip uninstall vcrpy` and `pip install -r requirements.txt` again.
This will install a new version of vcrpy that is compatible with running vcr in docker.
This workflow will be fixed as soon as the maintainer from VCRpy merges our changes.
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,6 @@ pytest-benchmark
pytest-cov
pytest-integration
pytest-mock
vcrpy
vcrpy @ git+https://github.com/Significant-Gravitas/vcrpy.git@master
pytest-recording
pytest-xdist
38 changes: 38 additions & 0 deletions tests/integration/agent_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,3 +246,41 @@ def get_nobel_prize_agent(agent_test_config, memory_json_file, workspace: Worksp
)

return agent


@pytest.fixture
def debug_code_agent(agent_test_config, memory_json_file, workspace: Workspace):
command_registry = CommandRegistry()
command_registry.import_commands("autogpt.commands.file_operations")
command_registry.import_commands("autogpt.commands.execute_code")
command_registry.import_commands("autogpt.commands.improve_code")
command_registry.import_commands("autogpt.app")
command_registry.import_commands("autogpt.commands.task_statuses")

ai_config = AIConfig(
ai_name="Debug Code Agent",
ai_role="an autonomous agent that specializes in debugging python code",
ai_goals=[
"1-Run the code in the file named 'code.py' using the execute_code command.",
"2-Read code.py to understand why the code is not working as expected.",
"3-Modify code.py to fix the error.",
"Repeat step 1, 2 and 3 until the code is working as expected. When you're done use the task_complete command.",
"Do not use any other commands than execute_python_file and write_file",
],
)
ai_config.command_registry = command_registry

system_prompt = ai_config.construct_full_prompt()
Config().set_continuous_mode(False)
agent = Agent(
ai_name="Debug Code Agent",
memory=memory_json_file,
command_registry=command_registry,
config=ai_config,
next_action_count=0,
system_prompt=system_prompt,
triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
workspace_directory=workspace.root,
)

return agent
8 changes: 7 additions & 1 deletion tests/integration/challenges/current_score.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@
"max_level_beaten": 1
}
},
"debug_code": {
"debug_code_challenge_a": {
"max_level": 1,
"max_level_beaten": 1
}
},
"kubernetes": {
"kubernetes_template_challenge_a": {
"max_level": 1,
Expand All @@ -39,4 +45,4 @@
"max_level_beaten": 1
}
}
}
}
19 changes: 19 additions & 0 deletions tests/integration/challenges/debug_code/data/two_sum.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# mypy: ignore-errors
from typing import List, Optional


def two_sum(nums: List, target: int) -> Optional[int]:
seen = {}
for i, num in enumerate(nums):
complement = target - num
if complement in seen:
return [seen[complement], i]
seen[num] = i
return None


# Example usage:
nums = [2, 7, 11, 15]
target = 9
result = two_sum(nums, target)
print(result) # Output: [0, 1]
30 changes: 30 additions & 0 deletions tests/integration/challenges/debug_code/data/two_sum_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# mypy: ignore-errors
# we need a new line at the top of the file to avoid a syntax error


def test_two_sum(nums, target, expected_result):
# These tests are appended to the two_sum file so we can ignore this error for now
result = two_sum(nums, target)
print(result)
assert (
result == expected_result
), f"AssertionError: Expected the output to be {expected_result}"


# test the trivial case with the first two numbers
nums = [2, 7, 11, 15]
target = 9
expected_result = [0, 1]
test_two_sum(nums, target, expected_result)

# test for ability to use zero and the same number twice
nums = [2, 7, 0, 15, 12, 0]
target = 0
expected_result = [2, 5]
test_two_sum(nums, target, expected_result)

# test for first and last index usage and negative numbers
nums = [-6, 7, 11, 4]
target = -2
expected_result = [0, 3]
test_two_sum(nums, target, expected_result)
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
from pathlib import Path

import pytest
from pytest_mock import MockerFixture

from autogpt.agent import Agent
from autogpt.commands.execute_code import execute_python_file
from autogpt.commands.file_operations import append_to_file, write_to_file
from autogpt.config import Config
from tests.integration.challenges.challenge_decorator.challenge_decorator import (
challenge,
)
from tests.integration.challenges.utils import run_interaction_loop
from tests.utils import requires_api_key

CYCLE_COUNT = 5


@pytest.mark.vcr
@requires_api_key("OPENAI_API_KEY")
@challenge
def test_debug_code_challenge_a(
debug_code_agent: Agent,
monkeypatch: pytest.MonkeyPatch,
patched_api_requestor: MockerFixture,
config: Config,
level_to_run: int,
) -> None:
"""
Test whether the agent can debug a simple code snippet.

:param debug_code_agent: The agent to test.
:param monkeypatch: pytest's monkeypatch utility for modifying builtins.
:patched_api_requestor: Sends api requests to our API CI pipeline
:config: The config object for the agent.
:level_to_run: The level to run.
"""

file_path = str(debug_code_agent.workspace.get_path("code.py"))

code_file_path = Path(__file__).parent / "data" / "two_sum.py"
test_file_path = Path(__file__).parent / "data" / "two_sum_tests.py"

write_to_file(file_path, code_file_path.read_text(), config)

run_interaction_loop(monkeypatch, debug_code_agent, CYCLE_COUNT)

append_to_file(file_path, test_file_path.read_text(), config)

output = execute_python_file(file_path, config)
assert "error" not in output.lower(), f"Errors found in output: {output}!"