Implement docker based command line code executor (microsoft#1856)

jackgerrits · ekzhu · web-flow · commit 4503d964e655 · 2024-03-07T18:11:52.000Z
* implement docker based command line code executor

* undo import

* test skips

* format

* fix type issue

* skip docker tests

* fix paths

* add docs

* Update __init__.py

* class name

* precommit

* undo twoagent change

* use relative to directly

* Update, fixes, etc.

* update doc

* Update docstring

---------

Co-authored-by: Eric Zhu &lt;ekzhu@users.noreply.github.com&gt;
diff --git a/autogen/coding/__init__.py b/autogen/coding/__init__.py
@@ -1,6 +1,8 @@
 from .base import CodeBlock, CodeExecutor, CodeExtractor, CodeResult
 from .factory import CodeExecutorFactory
 from .markdown_code_extractor import MarkdownCodeExtractor
+from .local_commandline_code_executor import LocalCommandLineCodeExecutor, CommandLineCodeResult
+from .docker_commandline_code_executor import DockerCommandLineCodeExecutor
 
 __all__ = (
     "CodeBlock",
@@ -9,4 +11,7 @@
     "CodeExecutor",
     "CodeExecutorFactory",
     "MarkdownCodeExtractor",
+    "LocalCommandLineCodeExecutor",
+    "CommandLineCodeResult",
+    "DockerCommandLineCodeExecutor",
 )
diff --git a/autogen/coding/docker_commandline_code_executor.py b/autogen/coding/docker_commandline_code_executor.py
@@ -0,0 +1,231 @@
+from __future__ import annotations
+import atexit
+from hashlib import md5
+import logging
+from pathlib import Path
+from time import sleep
+from types import TracebackType
+import uuid
+from typing import List, Optional, Type, Union
+import docker
+from docker.models.containers import Container
+from docker.errors import ImageNotFound
+
+from .local_commandline_code_executor import CommandLineCodeResult
+
+from ..code_utils import TIMEOUT_MSG, _cmd
+from .base import CodeBlock, CodeExecutor, CodeExtractor
+from .markdown_code_extractor import MarkdownCodeExtractor
+import sys
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+
+
+def _wait_for_ready(container: Container, timeout: int = 60, stop_time: int = 0.1) -> None:
+    elapsed_time = 0
+    while container.status != "running" and elapsed_time < timeout:
+        sleep(stop_time)
+        elapsed_time += stop_time
+        container.reload()
+        continue
+    if container.status != "running":
+        raise ValueError("Container failed to start")
+
+
+__all__ = ("DockerCommandLineCodeExecutor",)
+
+
+class DockerCommandLineCodeExecutor(CodeExecutor):
+    def __init__(
+        self,
+        image: str = "python:3-slim",
+        container_name: Optional[str] = None,
+        timeout: int = 60,
+        work_dir: Union[Path, str] = Path("."),
+        auto_remove: bool = True,
+        stop_container: bool = True,
+    ):
+        """(Experimental) A code executor class that executes code through
+        a command line environment in a Docker container.
+
+        The executor first saves each code block in a file in the working
+        directory, and then executes the code file in the container.
+        The executor executes the code blocks in the order they are received.
+        Currently, the executor only supports Python and shell scripts.
+        For Python code, use the language "python" for the code block.
+        For shell scripts, use the language "bash", "shell", or "sh" for the code
+        block.
+
+        Args:
+            image (_type_, optional): Docker image to use for code execution.
+                Defaults to "python:3-slim".
+            container_name (Optional[str], optional): Name of the Docker container
+                which is created. If None, will autogenerate a name. Defaults to None.
+            timeout (int, optional): The timeout for code execution. Defaults to 60.
+            work_dir (Union[Path, str], optional): The working directory for the code
+                execution. Defaults to Path(".").
+            auto_remove (bool, optional): If true, will automatically remove the Docker
+                container when it is stopped. Defaults to True.
+            stop_container (bool, optional): If true, will automatically stop the
+                container when stop is called, when the context manager exits or when
+                the Python process exits with atext. Defaults to True.
+
+        Raises:
+            ValueError: On argument error, or if the container fails to start.
+        """
+
+        if timeout < 1:
+            raise ValueError("Timeout must be greater than or equal to 1.")
+
+        if isinstance(work_dir, str):
+            work_dir = Path(work_dir)
+
+        if not work_dir.exists():
+            raise ValueError(f"Working directory {work_dir} does not exist.")
+
+        client = docker.from_env()
+
+        # Check if the image exists
+        try:
+            client.images.get(image)
+        except ImageNotFound:
+            logging.info(f"Pulling image {image}...")
+            # Let the docker exception escape if this fails.
+            client.images.pull(image)
+
+        if container_name is None:
+            container_name = f"autogen-code-exec-{uuid.uuid4()}"
+
+        # Start a container from the image, read to exec commands later
+        self._container = client.containers.create(
+            image,
+            name=container_name,
+            entrypoint="/bin/sh",
+            tty=True,
+            auto_remove=auto_remove,
+            volumes={str(work_dir.resolve()): {"bind": "/workspace", "mode": "rw"}},
+            working_dir="/workspace",
+        )
+        self._container.start()
+
+        _wait_for_ready(self._container)
+
+        def cleanup():
+            try:
+                container = client.containers.get(container_name)
+                container.stop()
+            except docker.errors.NotFound:
+                pass
+
+            atexit.unregister(cleanup)
+
+        if stop_container:
+            atexit.register(cleanup)
+
+        self._cleanup = cleanup
+
+        # Check if the container is running
+        if self._container.status != "running":
+            raise ValueError(f"Failed to start container from image {image}. Logs: {self._container.logs()}")
+
+        self._timeout = timeout
+        self._work_dir: Path = work_dir
+
+    @property
+    def timeout(self) -> int:
+        """(Experimental) The timeout for code execution."""
+        return self._timeout
+
+    @property
+    def work_dir(self) -> Path:
+        """(Experimental) The working directory for the code execution."""
+        return self._work_dir
+
+    @property
+    def code_extractor(self) -> CodeExtractor:
+        """(Experimental) Export a code extractor that can be used by an agent."""
+        return MarkdownCodeExtractor()
+
+    def execute_code_blocks(self, code_blocks: List[CodeBlock]) -> CommandLineCodeResult:
+        """(Experimental) Execute the code blocks and return the result.
+
+        Args:
+            code_blocks (List[CodeBlock]): The code blocks to execute.
+
+        Returns:
+            CommandlineCodeResult: The result of the code execution."""
+
+        if len(code_blocks) == 0:
+            raise ValueError("No code blocks to execute.")
+
+        outputs = []
+        files = []
+        last_exit_code = 0
+        for code_block in code_blocks:
+            lang = code_block.language
+            code = code_block.code
+
+            code_hash = md5(code.encode()).hexdigest()
+
+            # Check if there is a filename comment
+            # Get first line
+            first_line = code.split("\n")[0]
+            if first_line.startswith("# filename:"):
+                filename = first_line.split(":")[1].strip()
+
+                # Handle relative paths in the filename
+                path = Path(filename)
+                if not path.is_absolute():
+                    path = Path("/workspace") / path
+                path = path.resolve()
+                try:
+                    path.relative_to(Path("/workspace"))
+                except ValueError:
+                    return CommandLineCodeResult(exit_code=1, output="Filename is not in the workspace")
+            else:
+                # create a file with a automatically generated name
+                filename = f"tmp_code_{code_hash}.{'py' if lang.startswith('python') else lang}"
+
+            code_path = self._work_dir / filename
+            with code_path.open("w", encoding="utf-8") as fout:
+                fout.write(code)
+
+            command = ["timeout", str(self._timeout), _cmd(lang), filename]
+
+            result = self._container.exec_run(command)
+            exit_code = result.exit_code
+            output = result.output.decode("utf-8")
+            if exit_code == 124:
+                output += "\n"
+                output += TIMEOUT_MSG
+
+            outputs.append(output)
+            files.append(code_path)
+
+            last_exit_code = exit_code
+            if exit_code != 0:
+                break
+
+        code_file = str(files[0]) if files else None
+        return CommandLineCodeResult(exit_code=last_exit_code, output="".join(outputs), code_file=code_file)
+
+    def restart(self) -> None:
+        """(Experimental) Restart the code executor."""
+        self._container.restart()
+        if self._container.status != "running":
+            raise ValueError(f"Failed to restart container. Logs: {self._container.logs()}")
+
+    def stop(self) -> None:
+        """(Experimental) Stop the code executor."""
+        self._cleanup()
+
+    def __enter__(self) -> Self:
+        return self
+
+    def __exit__(
+        self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
+    ) -> None:
+        self.stop()
diff --git a/autogen/coding/jupyter/docker_jupyter_server.py b/autogen/coding/jupyter/docker_jupyter_server.py
@@ -2,16 +2,17 @@
 
 from pathlib import Path
 import sys
-from time import sleep
 from types import TracebackType
 import uuid
-from typing import Dict, Optional, Union
+from typing import Dict, Optional, Type, Union
 import docker
 import secrets
 import io
 import atexit
 import logging
 
+from ..docker_commandline_code_executor import _wait_for_ready
+
 if sys.version_info >= (3, 11):
     from typing import Self
 else:
@@ -22,17 +23,6 @@
 from .base import JupyterConnectable, JupyterConnectionInfo
 
 
-def _wait_for_ready(container: docker.Container, timeout: int = 60, stop_time: int = 0.1) -> None:
-    elapsed_time = 0
-    while container.status != "running" and elapsed_time < timeout:
-        sleep(stop_time)
-        elapsed_time += stop_time
-        container.reload()
-        continue
-    if container.status != "running":
-        raise ValueError("Container failed to start")
-
-
 class DockerJupyterServer(JupyterConnectable):
     DEFAULT_DOCKERFILE = """FROM quay.io/jupyter/docker-stacks-foundation
 
@@ -162,6 +152,6 @@ def __enter__(self) -> Self:
         return self
 
     def __exit__(
-        self, exc_type: Optional[type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
+        self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
     ) -> None:
         self.stop()
diff --git a/autogen/coding/jupyter/jupyter_client.py b/autogen/coding/jupyter/jupyter_client.py
@@ -2,7 +2,7 @@
 
 from dataclasses import dataclass
 from types import TracebackType
-from typing import Any, Dict, List, Optional, cast
+from typing import Any, Dict, List, Optional, Type, cast
 import sys
 
 if sys.version_info >= (3, 11):
@@ -111,7 +111,7 @@ def __enter__(self) -> Self:
         return self
 
     def __exit__(
-        self, exc_type: Optional[type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
+        self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
     ) -> None:
         self.stop()
 
diff --git a/autogen/coding/jupyter/jupyter_code_executor.py b/autogen/coding/jupyter/jupyter_code_executor.py
@@ -5,7 +5,7 @@
 import re
 from types import TracebackType
 import uuid
-from typing import Any, ClassVar, List, Optional, Union
+from typing import Any, ClassVar, List, Optional, Type, Union
 import sys
 
 if sys.version_info >= (3, 11):
@@ -201,6 +201,6 @@ def __enter__(self) -> Self:
         return self
 
     def __exit__(
-        self, exc_type: Optional[type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
+        self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
     ) -> None:
         self.stop()
diff --git a/autogen/coding/jupyter/local_jupyter_server.py b/autogen/coding/jupyter/local_jupyter_server.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 from types import TracebackType
 
-from typing import Optional, Union, cast
+from typing import Optional, Type, Union, cast
 import subprocess
 import signal
 import sys
@@ -157,6 +157,6 @@ def __enter__(self) -> Self:
         return self
 
     def __exit__(
-        self, exc_type: Optional[type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
+        self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
     ) -> None:
         self.stop()
diff --git a/test/coding/test_commandline_code_executor.py b/test/coding/test_commandline_code_executor.py
diff --git a/test/coding/test_embedded_ipython_code_executor.py b/test/coding/test_embedded_ipython_code_executor.py
diff --git a/website/docs/topics/code-execution/cli-code-executor.ipynb b/website/docs/topics/code-execution/cli-code-executor.ipynb