From 0b8f3b8e82b752a3ed17cca8611276d46eb5586f Mon Sep 17 00:00:00 2001 From: Kilian Lieret Date: Tue, 15 Oct 2024 22:53:47 -0400 Subject: [PATCH] Extend tests and add github actions --- .github/workflows/pytest.yaml | 51 +++++++++++++++++ pyproject.toml | 9 ++- src/swebridge/local.py | 3 - src/swebridge/models.py | 2 +- src/swebridge/runtime.py | 16 ++++-- tests/conftest.py | 2 - tests/test_dress_rehearsal.py | 102 ++++++++++++++++++++++++++++++++++ 7 files changed, 172 insertions(+), 13 deletions(-) create mode 100644 .github/workflows/pytest.yaml diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml new file mode 100644 index 0000000..6bbd88a --- /dev/null +++ b/.github/workflows/pytest.yaml @@ -0,0 +1,51 @@ + +name: Pytest + +env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + +on: + push: + branches: + - main + paths-ignore: + - 'docs/**' + - 'README.md' + - 'mkdocs.yml' + pull_request: + branches: + - main + paths-ignore: + - 'docs/**' + - 'README.md' + - 'mkdocs.yml' + +jobs: + test: + runs-on: ubuntu-latest + defaults: + run: + shell: bash -l {0} + steps: + - name: Checkout code + uses: actions/checkout@v2 + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + - name: Install uv + run: | + curl -LsSf https://astral.sh/uv/install.sh | sh + - name: Install dependencies + run: | + uv pip install --python ${Python_ROOT_DIR} '.[dev]' + - name: Run pytest + uses: sjvrijn/pytest-last-failed@v2 + with: + pytest-args: '--cov' + # - name: Explicitly convert coverage to xml + # run: coverage xml + # - name: Upload coverage reports to Codecov + # uses: codecov/codecov-action@v4.0.1 + # with: + # token: ${{ secrets.CODECOV_TOKEN }} + # slug: princeton-nlp/SWE-agent diff --git a/pyproject.toml b/pyproject.toml index c542b7a..9c1a5f1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,13 @@ classifiers = [ "Programming Language :: Python :: 3 :: Only", ] +dependencies = [ + "fastapi", + "uvicorn", + "requests", + "pydantic", +] + [project.optional-dependencies] dev = [ # "mkdocs-material", @@ -212,4 +219,4 @@ ACI = "ACI" [tool.typos.default.extend-words] # Don't correct the surname "Teh" -aci = "aci" \ No newline at end of file +aci = "aci" diff --git a/src/swebridge/local.py b/src/swebridge/local.py index 98f9a72..501b8cf 100755 --- a/src/swebridge/local.py +++ b/src/swebridge/local.py @@ -111,9 +111,6 @@ def write_file(self, request: WriteFileRequest) -> WriteFileResponse: if __name__ == "__main__": runtime = RemoteRuntime("localhost:8000") - print(runtime.read_file(ReadFileRequest(path="README.md"))) - print(runtime.write_file(WriteFileRequest(path="_test.txt", content="test"))) - print(runtime.read_file(ReadFileRequest(path="_test.txt"))) # ---- # print(runtime.execute(Command(command="ls", shell=True))) # ---- diff --git a/src/swebridge/models.py b/src/swebridge/models.py index d9307cf..aaa5967 100644 --- a/src/swebridge/models.py +++ b/src/swebridge/models.py @@ -2,7 +2,7 @@ class CreateShellRequest(BaseModel): - name: str = "default" + session: str = "default" class CreateShellResponse(BaseModel): diff --git a/src/swebridge/runtime.py b/src/swebridge/runtime.py index 7225f8c..08578c2 100644 --- a/src/swebridge/runtime.py +++ b/src/swebridge/runtime.py @@ -105,20 +105,22 @@ def __init__(self): self.sessions: dict[str, Session] = {} async def create_shell(self, request: CreateShellRequest) -> CreateShellResponse: - if request.name in self.sessions: - return CreateShellResponse(success=False, failure_reason="session already exists") + if request.session in self.sessions: + return CreateShellResponse(success=False, failure_reason=f"session {request.session} already exists") shell = Session() - self.sessions[request.name] = shell + self.sessions[request.session] = shell return await shell.start() async def run_in_shell(self, action: Action) -> Observation: if action.session not in self.sessions: - return Observation(output="", exit_code_raw="-312", failure_reason="session does not exist") + return Observation( + output="", exit_code_raw="-312", failure_reason=f"session {action.session!r} does not exist" + ) return await self.sessions[action.session].run(action) async def close_shell(self, request: CloseRequest) -> CloseResponse: if request.session not in self.sessions: - return CloseResponse(success=False, failure_reason="session does not exist") + return CloseResponse(success=False, failure_reason=f"session {request.session!r} does not exist") out = await self.sessions[request.session].close() del self.sessions[request.session] return out @@ -132,7 +134,9 @@ async def execute(self, command: Command) -> CommandResponse: exit_code=result.returncode, ) except subprocess.TimeoutExpired: - return CommandResponse(stdout="", stderr="", exit_code=-1) + return CommandResponse( + stdout="", stderr=f"Timeout ({command.timeout}s) exceeded while running command", exit_code=-1 + ) except Exception as e: return CommandResponse(stdout="", stderr=str(e), exit_code=-2) diff --git a/tests/conftest.py b/tests/conftest.py index 570202c..8d547b7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -35,8 +35,6 @@ def run_server(): time.sleep(0.1) return RemoteServer(port) - # The thread will be automatically terminated when the test session ends - # because it's a daemon thread @pytest.fixture diff --git a/tests/test_dress_rehearsal.py b/tests/test_dress_rehearsal.py index b182584..d7a8b49 100644 --- a/tests/test_dress_rehearsal.py +++ b/tests/test_dress_rehearsal.py @@ -1,5 +1,107 @@ +from pathlib import Path + from swebridge.local import RemoteRuntime +from swebridge.models import Action, CloseRequest, Command, CreateShellRequest, ReadFileRequest, WriteFileRequest def test_server_alive(remote_runtime: RemoteRuntime): assert remote_runtime.is_alive() + + +def test_server_dead(): + r = RemoteRuntime("http://doesnotexistadsfasdfasdf234123qw34.com") + assert not r.is_alive() + + +def test_read_write_file(remote_runtime: RemoteRuntime, tmp_path: Path): + path = tmp_path / "test.txt" + remote_runtime.write_file(WriteFileRequest(path=str(path), content="test")) + assert path.read_text() == "test" + assert remote_runtime.read_file(ReadFileRequest(path=str(path))).content == "test" + + +def test_read_non_existent_file(remote_runtime: RemoteRuntime): + assert not remote_runtime.read_file(ReadFileRequest(path="non_existent.txt")).success + + +def test_execute_command(remote_runtime: RemoteRuntime): + assert remote_runtime.execute(Command(command="echo 'hello world'", shell=True)).stdout == "hello world\n" + + +def test_execute_command_shell_false(remote_runtime: RemoteRuntime): + assert remote_runtime.execute(Command(command=["echo", "hello world"], shell=False)).stdout == "hello world\n" + + +def test_execute_command_timeout(remote_runtime: RemoteRuntime): + r = remote_runtime.execute(Command(command=["sleep", "10"], timeout=0.1)) + assert not r.success + assert "timeout" in r.stderr.lower() + assert not r.stdout + + +def test_create_close_shell(remote_runtime: RemoteRuntime): + r = remote_runtime.create_shell(CreateShellRequest()) + assert r.success + r = remote_runtime.close_shell(CloseRequest()) + assert r.success + + +def test_run_in_shell(remote_runtime: RemoteRuntime): + name = "test_run_in_shell" + r = remote_runtime.create_shell(CreateShellRequest(session=name)) + assert r.success + r = remote_runtime.run_in_shell(Action(command="echo 'hello world'", session=name)) + assert r.success + r = remote_runtime.run_in_shell(Action(command="doesntexit", session=name)) + assert not r.success + r = remote_runtime.close_shell(CloseRequest(session=name)) + assert r.success + + +def test_run_in_shell_non_existent_session(remote_runtime: RemoteRuntime): + r = remote_runtime.run_in_shell(Action(command="echo 'hello world'", session="non_existent")) + assert not r.success + assert "does not exist" in r.failure_reason + + +def test_close_shell_non_existent_session(remote_runtime: RemoteRuntime): + r = remote_runtime.close_shell(CloseRequest(session="non_existent")) + assert not r.success + assert "does not exist" in r.failure_reason + + +def test_close_shell_twice(remote_runtime: RemoteRuntime): + r = remote_runtime.create_shell(CreateShellRequest()) + assert r.success + r = remote_runtime.close_shell(CloseRequest()) + assert r.success + r = remote_runtime.close_shell(CloseRequest()) + assert not r.success + assert "does not exist" in r.failure_reason + + +def test_run_in_shell_timeout(remote_runtime: RemoteRuntime): + print("in test") + r = remote_runtime.create_shell(CreateShellRequest()) + assert r.success + r = remote_runtime.run_in_shell(Action(command="sleep 10", timeout=0.1)) + assert not r.success + assert "timeout" in r.failure_reason + assert not r.output + r = remote_runtime.close_shell(CloseRequest()) + assert r.success + + +def test_run_in_shell_interactive_command(remote_runtime: RemoteRuntime): + r = remote_runtime.create_shell(CreateShellRequest()) + assert r.success + r = remote_runtime.run_in_shell(Action(command="python", is_interactive_command=True, expect=[">>> "])) + assert r.success + r = remote_runtime.run_in_shell( + Action(command="print('hello world')", is_interactive_command=True, expect=[">>> "]) + ) + assert r.success + r = remote_runtime.run_in_shell(Action(command="quit()\n", is_interactive_quit=True)) + assert r.success + r = remote_runtime.close_shell(CloseRequest()) + assert r.success