diff --git a/CLAUDE.md b/CLAUDE.md index e12ca6cd94..df0d491b48 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -133,7 +133,7 @@ src/synthorg/ subscribers/ # Concrete settings subscribers (ProviderSettingsSubscriber — rebuilds ModelRouter on strategy change, MemorySettingsSubscriber — advisory logging for memory config) security/ # SecOps agent, rule engine (soft-allow/hard-deny, fail-closed), audit log, output scanner, output scan response policies (redact/withhold/log-only/autonomy-tiered), risk classifier, risk tier classifier, action type registry, ToolInvoker security integration, progressive trust (4 strategies: disabled/weighted/per-category/milestone), autonomy levels (presets, resolver, change strategy), timeout policies (park/resume) templates/ # Pre-built company templates, personality presets, and builder - tools/ # Tool registry, built-in tools (file_system/, git, sandbox/, code_runner), git clone SSRF prevention (git_url_validator), MCP bridge (mcp/), role-based access, approval tool (request_human_approval), tool factory (build_default_tools, build_default_tools_from_config) + tools/ # Tool registry, built-in tools (file_system/, git, sandbox/, code_runner), git clone SSRF prevention (git_url_validator), MCP bridge (mcp/), role-based access, approval tool (request_human_approval), tool factory (build_default_tools, build_default_tools_from_config), sandbox factory (sandbox/factory.py: build_sandbox_backends, resolve_sandbox_for_category, cleanup_sandbox_backends -- per-category backend selection from SandboxingConfig) web/ # Vue 3 + PrimeVue + Tailwind CSS dashboard src/ diff --git a/docs/architecture/tech-stack.md b/docs/architecture/tech-stack.md index b7c852a79f..1ce1f47bf2 100644 --- a/docs/architecture/tech-stack.md +++ b/docs/architecture/tech-stack.md @@ -111,7 +111,7 @@ These conventions are used throughout the codebase. For full details on each, se | **Parallel tool execution** | Adopted | `asyncio.TaskGroup` in `ToolInvoker.invoke_all` with optional `max_concurrency` semaphore and structured error collection. | | **Parallel agent execution** | Adopted | `ParallelExecutor` with `TaskGroup` + `Semaphore` concurrency limits, `ResourceLock` for exclusive file-path claims, progress tracking, and shutdown awareness. | | **Tool permission checking** | Adopted | Category-level gating based on `ToolAccessLevel`. Priority-based resolution: denied list, allowed list, level categories, then deny. | -| **Tool sandboxing** | Adopted | Layered: in-process path validation for file system tools, `SubprocessSandbox` for git tools, `DockerSandbox` planned for code execution. | +| **Tool sandboxing** | Adopted | Layered: in-process path validation for file system tools, `SubprocessSandbox` for git tools, `DockerSandbox` for code execution. Per-category backend selection via `SandboxingConfig` and sandbox factory. | | **Crash recovery** | Adopted | Pluggable `RecoveryStrategy` protocol. Current: `FailAndReassignStrategy`. Planned: `CheckpointStrategy` for per-turn state persistence. | | **Personality compatibility** | Adopted | Weighted composite scoring: 60% Big Five similarity, 20% collaboration alignment, 20% conflict approach. | | **Agent behavior testing** | Planned | Scripted `FakeProvider` for unit tests; behavioral outcome assertions for integration tests. | diff --git a/docs/design/operations.md b/docs/design/operations.md index 789ca5bf6b..a5212002ed 100644 --- a/docs/design/operations.md +++ b/docs/design/operations.md @@ -463,6 +463,13 @@ isolation for high-risk tools. network_policy: "deny-all" # default deny, allowlist per tool ``` +Per-category backend selection is implemented in `tools/sandbox/factory.py` via three functions: +`build_sandbox_backends` (instantiates only the backends referenced by config), +`resolve_sandbox_for_category` (looks up the correct backend for a `ToolCategory`), and +`cleanup_sandbox_backends` (parallel cleanup with error isolation). The tool factory +(`build_default_tools_from_config`) wires `VERSION_CONTROL` category; other categories will +be wired as their tool builders are added. + Docker is optional -- only required when code execution, terminal, web, or database tools are enabled. File system and git tools work out of the box with subprocess isolation. This keeps the local-first experience lightweight while providing strong isolation where it matters. diff --git a/src/synthorg/observability/events/sandbox.py b/src/synthorg/observability/events/sandbox.py index ea07fd49a6..03e1ffb519 100644 --- a/src/synthorg/observability/events/sandbox.py +++ b/src/synthorg/observability/events/sandbox.py @@ -14,3 +14,9 @@ SANDBOX_HEALTH_CHECK: Final[str] = "sandbox.health_check" SANDBOX_KILL_FAILED: Final[str] = "sandbox.kill.failed" SANDBOX_KILL_FALLBACK: Final[str] = "sandbox.kill.fallback" +SANDBOX_FACTORY_BUILT: Final[str] = "sandbox.factory.built" +SANDBOX_FACTORY_BUILD_FAILED: Final[str] = "sandbox.factory.built.failed" +SANDBOX_FACTORY_RESOLVE: Final[str] = "sandbox.factory.resolve" +SANDBOX_FACTORY_RESOLVE_FAILED: Final[str] = "sandbox.factory.resolve.failed" +SANDBOX_FACTORY_CLEANUP: Final[str] = "sandbox.factory.cleanup" +SANDBOX_FACTORY_CLEANUP_FAILED: Final[str] = "sandbox.factory.cleanup.failed" diff --git a/src/synthorg/tools/factory.py b/src/synthorg/tools/factory.py index 98bb1dcca6..0e35d020d7 100644 --- a/src/synthorg/tools/factory.py +++ b/src/synthorg/tools/factory.py @@ -1,4 +1,4 @@ -"""Tool factory — instantiate built-in workspace tools with config-driven parameters. +"""Tool factory -- instantiate built-in workspace tools with config-driven parameters. Provides ``build_default_tools`` (core factory) and ``build_default_tools_from_config`` (convenience wrapper that @@ -9,6 +9,7 @@ from typing import TYPE_CHECKING +from synthorg.core.enums import ToolCategory from synthorg.observability import get_logger from synthorg.observability.events.tool import ( TOOL_FACTORY_BUILT, @@ -30,8 +31,13 @@ GitLogTool, GitStatusTool, ) +from synthorg.tools.sandbox.factory import ( + build_sandbox_backends, + resolve_sandbox_for_category, +) if TYPE_CHECKING: + from collections.abc import Mapping from pathlib import Path from synthorg.config.schema import RootConfig @@ -127,35 +133,87 @@ def build_default_tools( return result +def _resolve_vc_sandbox( + *, + config: RootConfig, + sandbox_backends: Mapping[str, SandboxBackend] | None, + workspace: Path, +) -> SandboxBackend: + """Resolve the sandbox backend for the VERSION_CONTROL category. + + Builds backends from config when *sandbox_backends* is ``None``. + """ + if sandbox_backends is None: + sandbox_backends = build_sandbox_backends( + config=config.sandboxing, + workspace=workspace, + ) + return resolve_sandbox_for_category( + config=config.sandboxing, + backends=sandbox_backends, + category=ToolCategory.VERSION_CONTROL, + ) + + def build_default_tools_from_config( *, workspace: Path, config: RootConfig, sandbox: SandboxBackend | None = None, + sandbox_backends: Mapping[str, SandboxBackend] | None = None, ) -> tuple[BaseTool, ...]: """Build default tools using parameters from a ``RootConfig``. Convenience wrapper that extracts ``config.git_clone`` and - delegates to :func:`build_default_tools`. + ``config.sandboxing`` to resolve per-category sandbox backends. + + Currently wires the ``VERSION_CONTROL`` category (git tools). + Other categories (e.g. ``CODE_EXECUTION``) will be wired as + their respective tool builders are added to the factory. + + Sandbox resolution priority: + 1. Explicit *sandbox* -- backward-compat single backend for all tools. + 2. Explicit *sandbox_backends* -- per-category resolution via config. + 3. Neither -- auto-build backends from ``config.sandboxing``. Args: workspace: Absolute path to the agent workspace root. config: Validated root configuration. - sandbox: Optional sandbox backend for subprocess - isolation (passed to git tools). + sandbox: Optional single sandbox backend (overrides per-category + resolution when provided). + sandbox_backends: Pre-built mapping of backend name to instance. + When provided, per-category resolution uses this map + instead of auto-building backends. Returns: Sorted tuple of ``BaseTool`` instances. Raises: ValueError: If *workspace* is not an absolute path. + KeyError: If per-category sandbox resolution finds a backend + name not present in the built or provided backends mapping. """ logger.debug( TOOL_FACTORY_CONFIG_ENTRY, source="config", ) + + if sandbox is not None: + # Explicit single backend -- backward compat + return build_default_tools( + workspace=workspace, + git_clone_policy=config.git_clone, + sandbox=sandbox, + ) + + vc_sandbox = _resolve_vc_sandbox( + config=config, + sandbox_backends=sandbox_backends, + workspace=workspace, + ) + return build_default_tools( workspace=workspace, git_clone_policy=config.git_clone, - sandbox=sandbox, + sandbox=vc_sandbox, ) diff --git a/src/synthorg/tools/sandbox/__init__.py b/src/synthorg/tools/sandbox/__init__.py index 1a4067371e..9a638e2966 100644 --- a/src/synthorg/tools/sandbox/__init__.py +++ b/src/synthorg/tools/sandbox/__init__.py @@ -4,6 +4,11 @@ from .docker_config import DockerSandboxConfig from .docker_sandbox import DockerSandbox from .errors import SandboxError, SandboxStartError, SandboxTimeoutError +from .factory import ( + build_sandbox_backends, + cleanup_sandbox_backends, + resolve_sandbox_for_category, +) from .protocol import SandboxBackend from .result import SandboxResult from .sandboxing_config import SandboxingConfig @@ -20,4 +25,7 @@ "SandboxingConfig", "SubprocessSandbox", "SubprocessSandboxConfig", + "build_sandbox_backends", + "cleanup_sandbox_backends", + "resolve_sandbox_for_category", ] diff --git a/src/synthorg/tools/sandbox/docker_sandbox.py b/src/synthorg/tools/sandbox/docker_sandbox.py index a875fed201..46663723b9 100644 --- a/src/synthorg/tools/sandbox/docker_sandbox.py +++ b/src/synthorg/tools/sandbox/docker_sandbox.py @@ -13,6 +13,7 @@ import aiodocker import aiodocker.containers +from synthorg.core.types import NotBlankStr from synthorg.observability import get_logger from synthorg.observability.events.docker import ( DOCKER_CLEANUP, @@ -35,8 +36,6 @@ if TYPE_CHECKING: from collections.abc import Mapping - from synthorg.core.types import NotBlankStr - logger = get_logger(__name__) _DEFAULT_CONFIG = DockerSandboxConfig() @@ -645,4 +644,4 @@ async def health_check(self) -> bool: def get_backend_type(self) -> NotBlankStr: """Return ``'docker'``.""" - return "docker" + return NotBlankStr("docker") diff --git a/src/synthorg/tools/sandbox/factory.py b/src/synthorg/tools/sandbox/factory.py new file mode 100644 index 0000000000..36e4170e0b --- /dev/null +++ b/src/synthorg/tools/sandbox/factory.py @@ -0,0 +1,212 @@ +"""Sandbox backend factory -- build and resolve backends from config. + +Provides ``build_sandbox_backends`` to instantiate only the backends +referenced by a ``SandboxingConfig``, ``resolve_sandbox_for_category`` +to look up the correct backend for a tool category, and +``cleanup_sandbox_backends`` to release resources. +""" + +import asyncio +from types import MappingProxyType +from typing import TYPE_CHECKING + +from synthorg.observability import get_logger +from synthorg.observability.events.sandbox import ( + SANDBOX_FACTORY_BUILD_FAILED, + SANDBOX_FACTORY_BUILT, + SANDBOX_FACTORY_CLEANUP, + SANDBOX_FACTORY_CLEANUP_FAILED, + SANDBOX_FACTORY_RESOLVE, + SANDBOX_FACTORY_RESOLVE_FAILED, +) +from synthorg.tools.sandbox.docker_sandbox import DockerSandbox +from synthorg.tools.sandbox.subprocess_sandbox import SubprocessSandbox + +if TYPE_CHECKING: + from collections.abc import Mapping + from pathlib import Path + + from synthorg.core.enums import ToolCategory + from synthorg.tools.sandbox.protocol import SandboxBackend + from synthorg.tools.sandbox.sandboxing_config import SandboxingConfig + +logger = get_logger(__name__) + +_KNOWN_BACKENDS: frozenset[str] = frozenset({"subprocess", "docker"}) + + +def _instantiate_backend( + name: str, + config: SandboxingConfig, + workspace: Path, +) -> SandboxBackend: + """Construct a single sandbox backend by name. + + Logs and re-raises on construction failure. + """ + if name not in _KNOWN_BACKENDS: + msg = f"No constructor for backend {name!r}" + raise ValueError(msg) + + try: + if name == "subprocess": + return SubprocessSandbox( + config=config.subprocess, + workspace=workspace, + ) + return DockerSandbox( + config=config.docker, + workspace=workspace, + ) + except Exception: + logger.error( + SANDBOX_FACTORY_BUILD_FAILED, + backend=name, + workspace=str(workspace), + exc_info=True, + ) + raise + + +def build_sandbox_backends( + *, + config: SandboxingConfig, + workspace: Path, +) -> MappingProxyType[str, SandboxBackend]: + """Build only the backend instances actually referenced by *config*. + + Collects which backend names are needed (the default plus all + override values), then instantiates ``SubprocessSandbox`` and/or + ``DockerSandbox`` with their respective sub-configs. + + Args: + config: Top-level sandboxing configuration. + workspace: Absolute path to the agent workspace root. + + Returns: + A read-only mapping of backend name to backend instance. + Only contains keys for backends that are actually referenced. + + Raises: + ValueError: If *config* references an unrecognized backend + name not in the known set (``subprocess``, ``docker``). + """ + needed: set[str] = {config.default_backend} + needed.update(config.overrides.values()) + + unknown = needed - _KNOWN_BACKENDS + if unknown: + msg = ( + f"Unrecognized sandbox backend name(s): {sorted(unknown)}; " + f"known backends: {sorted(_KNOWN_BACKENDS)}" + ) + logger.error(SANDBOX_FACTORY_BUILD_FAILED, error=msg) + raise ValueError(msg) + + backends: dict[str, SandboxBackend] = { + name: _instantiate_backend(name, config, workspace) for name in sorted(needed) + } + + logger.info( + SANDBOX_FACTORY_BUILT, + backends=sorted(backends.keys()), + default=config.default_backend, + override_count=len(config.overrides), + ) + return MappingProxyType(backends) + + +def resolve_sandbox_for_category( + *, + config: SandboxingConfig, + backends: Mapping[str, SandboxBackend], + category: ToolCategory, +) -> SandboxBackend: + """Look up the correct backend for a tool category. + + Uses ``config.backend_for_category()`` to determine the backend + name, then returns the corresponding instance from *backends*. + + Args: + config: Top-level sandboxing configuration. + backends: Mapping of backend name to backend instance. + category: The tool category to resolve. + + Returns: + The ``SandboxBackend`` instance for the given category. + + Raises: + KeyError: If the resolved backend name is not present in + *backends*. + """ + backend_name = config.backend_for_category(category.value) + try: + backend = backends[backend_name] + except KeyError as exc: + msg = ( + f"Backend {backend_name!r} resolved for category " + f"{category.value!r} not found in backends mapping " + f"(available: {sorted(backends.keys())})" + ) + logger.warning( + SANDBOX_FACTORY_RESOLVE_FAILED, + category=category.value, + backend=backend_name, + error=msg, + ) + raise KeyError(msg) from exc + + logger.debug( + SANDBOX_FACTORY_RESOLVE, + category=category.value, + backend=backend_name, + ) + return backend + + +async def cleanup_sandbox_backends( + *, + backends: Mapping[str, SandboxBackend], +) -> None: + """Clean up all backends by calling ``cleanup()`` on each. + + Errors from individual backends are logged but do not prevent + cleanup of remaining backends. Uses ``asyncio.gather`` with + ``return_exceptions=True`` for best-effort parallel cleanup + that is resilient to task cancellation. + + Args: + backends: Mapping of backend name to backend instance. + """ + + async def _cleanup_one(name: str, backend: SandboxBackend) -> None: + logger.debug(SANDBOX_FACTORY_CLEANUP, backend=name) + try: + await backend.cleanup() + except Exception: + logger.warning( + SANDBOX_FACTORY_CLEANUP_FAILED, + backend=name, + error=f"cleanup failed for backend {name!r}", + exc_info=True, + ) + + # NOTE: intentionally using gather(return_exceptions=True) instead of + # TaskGroup here. TaskGroup cancels all siblings when one task raises + # a BaseException (e.g. CancelledError during shutdown), defeating + # the error-isolation guarantee this function promises. gather keeps + # all tasks running independently. + backend_items = list(backends.items()) + results = await asyncio.gather( + *(_cleanup_one(n, b) for n, b in backend_items), + return_exceptions=True, + ) + # Log BaseException subclasses (CancelledError, KeyboardInterrupt) + # that escaped _cleanup_one's except Exception block + for (name, _), result in zip(backend_items, results, strict=True): + if isinstance(result, BaseException): + logger.error( + SANDBOX_FACTORY_CLEANUP_FAILED, + backend=name, + error=f"unhandled exception during cleanup: {result!r}", + ) diff --git a/tests/integration/tools/test_sandbox_wiring_integration.py b/tests/integration/tools/test_sandbox_wiring_integration.py new file mode 100644 index 0000000000..4ac4197b2f --- /dev/null +++ b/tests/integration/tools/test_sandbox_wiring_integration.py @@ -0,0 +1,135 @@ +"""Integration tests for sandbox backend wiring via YAML config.""" + +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +from synthorg.config.loader import load_config_from_string +from synthorg.tools._git_base import _BaseGitTool +from synthorg.tools.factory import build_default_tools_from_config +from synthorg.tools.sandbox.protocol import SandboxBackend + + +@pytest.mark.integration +class TestSandboxWiringIntegration: + """Integration: YAML config -> SandboxingConfig -> factory -> correct backends.""" + + @patch( + "synthorg.tools.sandbox.factory.SubprocessSandbox", + ) + def test_default_yaml_wires_subprocess_to_git_tools( + self, + mock_subprocess_cls: MagicMock, + tmp_path: Path, + ) -> None: + """Default YAML config (no sandboxing section) wires subprocess.""" + mock_instance = MagicMock(spec=SandboxBackend) + mock_subprocess_cls.return_value = mock_instance + + yaml_str = """\ +company_name: test-corp +""" + config = load_config_from_string(yaml_str) + tools = build_default_tools_from_config( + workspace=tmp_path, + config=config, + ) + + git_tools = [t for t in tools if isinstance(t, _BaseGitTool)] + assert len(git_tools) == 6 + for tool in git_tools: + assert tool._sandbox is mock_instance + + @patch( + "synthorg.tools.sandbox.factory.DockerSandbox", + ) + def test_docker_default_yaml_wires_docker_to_git_tools( + self, + mock_docker_cls: MagicMock, + tmp_path: Path, + ) -> None: + """YAML with docker default wires docker backend to git tools.""" + mock_instance = MagicMock(spec=SandboxBackend) + mock_docker_cls.return_value = mock_instance + + yaml_str = """\ +company_name: test-corp +sandboxing: + default_backend: docker +""" + config = load_config_from_string(yaml_str) + tools = build_default_tools_from_config( + workspace=tmp_path, + config=config, + ) + + git_tools = [t for t in tools if isinstance(t, _BaseGitTool)] + assert len(git_tools) == 6 + for tool in git_tools: + assert tool._sandbox is mock_instance + + @patch( + "synthorg.tools.sandbox.factory.DockerSandbox", + ) + @patch( + "synthorg.tools.sandbox.factory.SubprocessSandbox", + ) + def test_per_category_override_yaml( + self, + mock_subprocess_cls: MagicMock, + mock_docker_cls: MagicMock, + tmp_path: Path, + ) -> None: + """YAML with per-category override wires correct backend per category.""" + mock_subprocess = MagicMock(spec=SandboxBackend) + mock_docker = MagicMock(spec=SandboxBackend) + mock_subprocess_cls.return_value = mock_subprocess + mock_docker_cls.return_value = mock_docker + + yaml_str = """\ +company_name: test-corp +sandboxing: + default_backend: subprocess + overrides: + version_control: docker +""" + config = load_config_from_string(yaml_str) + tools = build_default_tools_from_config( + workspace=tmp_path, + config=config, + ) + + # Git tools (version_control) should get docker backend + git_tools = [t for t in tools if isinstance(t, _BaseGitTool)] + assert len(git_tools) == 6 + for tool in git_tools: + assert tool._sandbox is mock_docker + + @patch( + "synthorg.tools.sandbox.factory.SubprocessSandbox", + ) + def test_explicit_subprocess_config_in_yaml( + self, + mock_subprocess_cls: MagicMock, + tmp_path: Path, + ) -> None: + """YAML subprocess config values propagate to SubprocessSandbox.""" + mock_instance = MagicMock(spec=SandboxBackend) + mock_subprocess_cls.return_value = mock_instance + + yaml_str = """\ +company_name: test-corp +sandboxing: + default_backend: subprocess + subprocess: + timeout_seconds: 120 +""" + config = load_config_from_string(yaml_str) + build_default_tools_from_config( + workspace=tmp_path, + config=config, + ) + + call_kwargs = mock_subprocess_cls.call_args.kwargs + assert call_kwargs["config"].timeout_seconds == 120 diff --git a/tests/unit/tools/sandbox/test_sandbox_factory.py b/tests/unit/tools/sandbox/test_sandbox_factory.py new file mode 100644 index 0000000000..df55964ed1 --- /dev/null +++ b/tests/unit/tools/sandbox/test_sandbox_factory.py @@ -0,0 +1,305 @@ +"""Unit tests for sandbox backend factory functions.""" + +from collections.abc import Mapping +from pathlib import Path +from types import MappingProxyType +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from synthorg.core.enums import ToolCategory +from synthorg.observability.events.sandbox import SANDBOX_FACTORY_CLEANUP_FAILED +from synthorg.tools.sandbox.config import SubprocessSandboxConfig +from synthorg.tools.sandbox.docker_config import DockerSandboxConfig +from synthorg.tools.sandbox.factory import ( + build_sandbox_backends, + cleanup_sandbox_backends, + resolve_sandbox_for_category, +) +from synthorg.tools.sandbox.protocol import SandboxBackend +from synthorg.tools.sandbox.sandboxing_config import SandboxingConfig + + +@pytest.mark.unit +class TestBuildSandboxBackends: + """Tests for build_sandbox_backends().""" + + @patch("synthorg.tools.sandbox.factory.SubprocessSandbox") + def test_default_subprocess_builds_only_subprocess( + self, + mock_subprocess_cls: MagicMock, + tmp_path: Path, + ) -> None: + """Default config (subprocess) builds only SubprocessSandbox.""" + config = SandboxingConfig(default_backend="subprocess") + backends = build_sandbox_backends(config=config, workspace=tmp_path) + + assert "subprocess" in backends + assert "docker" not in backends + mock_subprocess_cls.assert_called_once_with( + config=config.subprocess, + workspace=tmp_path, + ) + + @patch("synthorg.tools.sandbox.factory.DockerSandbox") + def test_default_docker_builds_only_docker( + self, + mock_docker_cls: MagicMock, + tmp_path: Path, + ) -> None: + """Docker default builds only DockerSandbox.""" + config = SandboxingConfig(default_backend="docker") + backends = build_sandbox_backends(config=config, workspace=tmp_path) + + assert "docker" in backends + assert "subprocess" not in backends + mock_docker_cls.assert_called_once_with( + config=config.docker, + workspace=tmp_path, + ) + + @patch("synthorg.tools.sandbox.factory.DockerSandbox") + @patch("synthorg.tools.sandbox.factory.SubprocessSandbox") + def test_mixed_overrides_builds_both( + self, + mock_subprocess_cls: MagicMock, + mock_docker_cls: MagicMock, + tmp_path: Path, + ) -> None: + """Default subprocess + code_execution override docker builds both.""" + config = SandboxingConfig( + default_backend="subprocess", + overrides={"code_execution": "docker"}, + ) + backends = build_sandbox_backends(config=config, workspace=tmp_path) + + assert "subprocess" in backends + assert "docker" in backends + mock_subprocess_cls.assert_called_once() + mock_docker_cls.assert_called_once() + + @patch("synthorg.tools.sandbox.factory.SubprocessSandbox") + def test_backends_use_correct_sub_configs( + self, + mock_subprocess_cls: MagicMock, + tmp_path: Path, + ) -> None: + """Backend instances receive their sub-configs from SandboxingConfig.""" + sub_config = SubprocessSandboxConfig(timeout_seconds=120) + config = SandboxingConfig( + default_backend="subprocess", + subprocess=sub_config, + ) + build_sandbox_backends(config=config, workspace=tmp_path) + + mock_subprocess_cls.assert_called_once_with( + config=sub_config, + workspace=tmp_path, + ) + + @patch("synthorg.tools.sandbox.factory.DockerSandbox") + def test_docker_backend_uses_docker_sub_config( + self, + mock_docker_cls: MagicMock, + tmp_path: Path, + ) -> None: + """Docker backend receives docker sub-config.""" + docker_config = DockerSandboxConfig(image="test-image:latest") + config = SandboxingConfig( + default_backend="docker", + docker=docker_config, + ) + build_sandbox_backends(config=config, workspace=tmp_path) + + mock_docker_cls.assert_called_once_with( + config=docker_config, + workspace=tmp_path, + ) + + @patch("synthorg.tools.sandbox.factory.SubprocessSandbox") + def test_only_needed_backends_instantiated( + self, + mock_subprocess_cls: MagicMock, + tmp_path: Path, + ) -> None: + """When all overrides use the same backend, only that one is built.""" + config = SandboxingConfig( + default_backend="subprocess", + overrides={ + "version_control": "subprocess", + "code_execution": "subprocess", + }, + ) + backends = build_sandbox_backends(config=config, workspace=tmp_path) + + assert set(backends.keys()) == {"subprocess"} + mock_subprocess_cls.assert_called_once() + + @patch("synthorg.tools.sandbox.factory.DockerSandbox") + @patch("synthorg.tools.sandbox.factory.SubprocessSandbox") + def test_returns_mapping_proxy_with_correct_keys( + self, + mock_subprocess_cls: MagicMock, + mock_docker_cls: MagicMock, + tmp_path: Path, + ) -> None: + """Return type is a MappingProxyType with correct keys and values.""" + config = SandboxingConfig( + default_backend="subprocess", + overrides={"code_execution": "docker"}, + ) + backends = build_sandbox_backends(config=config, workspace=tmp_path) + + assert isinstance(backends, MappingProxyType) + assert set(backends.keys()) == {"subprocess", "docker"} + assert backends["subprocess"] is mock_subprocess_cls.return_value + assert backends["docker"] is mock_docker_cls.return_value + + +@pytest.mark.unit +class TestResolveSandboxForCategory: + """Tests for resolve_sandbox_for_category().""" + + def test_returns_override_backend_for_overridden_category( + self, + ) -> None: + """Category with override returns the override backend.""" + mock_subprocess = MagicMock(spec=SandboxBackend) + mock_docker = MagicMock(spec=SandboxBackend) + config = SandboxingConfig( + default_backend="subprocess", + overrides={"code_execution": "docker"}, + ) + backends: Mapping[str, SandboxBackend] = { + "subprocess": mock_subprocess, + "docker": mock_docker, + } + + result = resolve_sandbox_for_category( + config=config, + backends=backends, + category=ToolCategory.CODE_EXECUTION, + ) + assert result is mock_docker + + def test_falls_back_to_default_for_non_overridden_category( + self, + ) -> None: + """Category without override falls back to default backend.""" + mock_subprocess = MagicMock(spec=SandboxBackend) + config = SandboxingConfig(default_backend="subprocess") + backends: Mapping[str, SandboxBackend] = { + "subprocess": mock_subprocess, + } + + result = resolve_sandbox_for_category( + config=config, + backends=backends, + category=ToolCategory.VERSION_CONTROL, + ) + assert result is mock_subprocess + + @pytest.mark.parametrize( + "category", + list(ToolCategory), + ids=[c.name for c in ToolCategory], + ) + def test_works_with_all_tool_categories( + self, + category: ToolCategory, + ) -> None: + """Every ToolCategory value resolves without error.""" + mock_backend = MagicMock(spec=SandboxBackend) + config = SandboxingConfig(default_backend="subprocess") + backends: Mapping[str, SandboxBackend] = { + "subprocess": mock_backend, + } + + result = resolve_sandbox_for_category( + config=config, + backends=backends, + category=category, + ) + assert result is mock_backend + + def test_raises_key_error_when_backend_missing(self) -> None: + """KeyError with descriptive message when backend not in map.""" + config = SandboxingConfig( + default_backend="subprocess", + overrides={"version_control": "docker"}, + ) + backends: Mapping[str, SandboxBackend] = { + "subprocess": MagicMock(spec=SandboxBackend), + } + + with pytest.raises( + KeyError, + match=( + r"resolved for category 'version_control'" + r".*available: \['subprocess'\]" + ), + ): + resolve_sandbox_for_category( + config=config, + backends=backends, + category=ToolCategory.VERSION_CONTROL, + ) + + +@pytest.mark.unit +class TestCleanupSandboxBackends: + """Tests for cleanup_sandbox_backends().""" + + async def test_calls_cleanup_on_all_backends(self) -> None: + """cleanup() is called on every backend in the mapping.""" + mock_sub = AsyncMock(spec=SandboxBackend) + mock_docker = AsyncMock(spec=SandboxBackend) + backends: Mapping[str, SandboxBackend] = { + "subprocess": mock_sub, + "docker": mock_docker, + } + + await cleanup_sandbox_backends(backends=backends) + + mock_sub.cleanup.assert_awaited_once() + mock_docker.cleanup.assert_awaited_once() + + async def test_cleanup_empty_mapping(self) -> None: + """No error when cleaning up an empty mapping.""" + await cleanup_sandbox_backends(backends={}) + + async def test_cleanup_single_backend(self) -> None: + """Cleanup works with a single backend.""" + mock_backend = AsyncMock(spec=SandboxBackend) + backends: Mapping[str, SandboxBackend] = { + "subprocess": mock_backend, + } + + await cleanup_sandbox_backends(backends=backends) + + mock_backend.cleanup.assert_awaited_once() + + async def test_cleanup_continues_on_error(self) -> None: + """All backends are cleaned up even when one raises.""" + failing = AsyncMock(spec=SandboxBackend) + failing.cleanup.side_effect = RuntimeError("container gone") + healthy = AsyncMock(spec=SandboxBackend) + + backends: Mapping[str, SandboxBackend] = { + "broken": failing, + "ok": healthy, + } + + with patch( + "synthorg.tools.sandbox.factory.logger", + ) as mock_logger: + # Should not raise -- errors are logged and swallowed + await cleanup_sandbox_backends(backends=backends) + + failing.cleanup.assert_awaited_once() + healthy.cleanup.assert_awaited_once() + # Verify the error was actually logged + mock_logger.warning.assert_called_once() + call_args = mock_logger.warning.call_args + assert call_args[0][0] == SANDBOX_FACTORY_CLEANUP_FAILED + assert call_args[1]["backend"] == "broken" diff --git a/tests/unit/tools/test_factory_sandbox_wiring.py b/tests/unit/tools/test_factory_sandbox_wiring.py new file mode 100644 index 0000000000..5b9b8f0ba3 --- /dev/null +++ b/tests/unit/tools/test_factory_sandbox_wiring.py @@ -0,0 +1,228 @@ +"""Unit tests for per-category sandbox wiring in build_default_tools_from_config.""" + +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +from synthorg.config.schema import RootConfig +from synthorg.tools._git_base import _BaseGitTool +from synthorg.tools.base import BaseTool +from synthorg.tools.factory import build_default_tools_from_config +from synthorg.tools.file_system import BaseFileSystemTool +from synthorg.tools.sandbox.protocol import SandboxBackend +from synthorg.tools.sandbox.sandboxing_config import SandboxingConfig + +_GIT_TOOL_NAMES: frozenset[str] = frozenset( + { + "git_status", + "git_log", + "git_diff", + "git_branch", + "git_commit", + "git_clone", + } +) + +_FS_TOOL_NAMES: frozenset[str] = frozenset( + { + "read_file", + "write_file", + "edit_file", + "list_directory", + "delete_file", + } +) + +_EXPECTED_TOOL_COUNT: int = len(_GIT_TOOL_NAMES) + len(_FS_TOOL_NAMES) + + +def _git_tools( + tools: tuple[BaseTool, ...], +) -> list[_BaseGitTool]: + """Filter and return git tools, asserting expected count.""" + matched = [t for t in tools if t.name in _GIT_TOOL_NAMES] + assert len(matched) == len(_GIT_TOOL_NAMES) + return matched # type: ignore[return-value] # narrowed by name membership check + + +@pytest.mark.unit +class TestFactorySandboxWiring: + """Tests for per-category sandbox resolution in build_default_tools_from_config.""" + + @patch( + "synthorg.tools.sandbox.factory.SubprocessSandbox", + ) + def test_default_config_gives_git_tools_subprocess_sandbox( + self, + mock_subprocess_cls: MagicMock, + tmp_path: Path, + ) -> None: + """Default SandboxingConfig gives all git tools a subprocess sandbox.""" + mock_instance = MagicMock(spec=SandboxBackend) + mock_subprocess_cls.return_value = mock_instance + + config = RootConfig(company_name="test-corp") + tools = build_default_tools_from_config( + workspace=tmp_path, + config=config, + ) + + for tool in _git_tools(tools): + assert tool._sandbox is mock_instance + + @patch( + "synthorg.tools.sandbox.factory.DockerSandbox", + ) + def test_docker_default_gives_git_tools_docker_sandbox( + self, + mock_docker_cls: MagicMock, + tmp_path: Path, + ) -> None: + """Docker default SandboxingConfig gives all git tools a docker sandbox.""" + mock_instance = MagicMock(spec=SandboxBackend) + mock_docker_cls.return_value = mock_instance + + sandboxing = SandboxingConfig(default_backend="docker") + config = RootConfig( + company_name="test-corp", + sandboxing=sandboxing, + ) + tools = build_default_tools_from_config( + workspace=tmp_path, + config=config, + ) + + for tool in _git_tools(tools): + assert tool._sandbox is mock_instance + + def test_explicit_sandbox_param_takes_precedence( + self, + tmp_path: Path, + ) -> None: + """Explicit sandbox= param overrides config-based resolution.""" + explicit_sandbox = MagicMock(spec=SandboxBackend) + config = RootConfig(company_name="test-corp") + + tools = build_default_tools_from_config( + workspace=tmp_path, + config=config, + sandbox=explicit_sandbox, + ) + + for tool in _git_tools(tools): + assert tool._sandbox is explicit_sandbox + + def test_explicit_sandbox_overrides_sandbox_backends( + self, + tmp_path: Path, + ) -> None: + """sandbox= takes precedence even when sandbox_backends= also provided.""" + explicit = MagicMock(spec=SandboxBackend) + backends_map = {"subprocess": MagicMock(spec=SandboxBackend)} + config = RootConfig(company_name="test-corp") + + tools = build_default_tools_from_config( + workspace=tmp_path, + config=config, + sandbox=explicit, + sandbox_backends=backends_map, + ) + + for tool in _git_tools(tools): + assert tool._sandbox is explicit + + @patch( + "synthorg.tools.sandbox.factory.SubprocessSandbox", + ) + def test_explicit_sandbox_backends_map_used( + self, + mock_subprocess_cls: MagicMock, + tmp_path: Path, + ) -> None: + """Explicit sandbox_backends= map is used for resolution.""" + mock_backend = MagicMock(spec=SandboxBackend) + config = RootConfig(company_name="test-corp") + + tools = build_default_tools_from_config( + workspace=tmp_path, + config=config, + sandbox_backends={"subprocess": mock_backend}, + ) + + for tool in _git_tools(tools): + assert tool._sandbox is mock_backend + # Should NOT auto-build backends when map is provided + mock_subprocess_cls.assert_not_called() + + @patch( + "synthorg.tools.sandbox.factory.SubprocessSandbox", + ) + def test_auto_build_from_config_when_neither_provided( + self, + mock_subprocess_cls: MagicMock, + tmp_path: Path, + ) -> None: + """Auto-builds backends from config when no sandbox args provided.""" + mock_instance = MagicMock(spec=SandboxBackend) + mock_subprocess_cls.return_value = mock_instance + + config = RootConfig(company_name="test-corp") + tools = build_default_tools_from_config( + workspace=tmp_path, + config=config, + ) + + # Should have built subprocess backend from config + mock_subprocess_cls.assert_called_once() + + for tool in _git_tools(tools): + assert tool._sandbox is mock_instance + + def test_file_system_tools_unaffected( + self, + tmp_path: Path, + ) -> None: + """File system tools have no sandbox attribute regardless of config.""" + mock_sandbox = MagicMock(spec=SandboxBackend) + config = RootConfig(company_name="test-corp") + + tools = build_default_tools_from_config( + workspace=tmp_path, + config=config, + sandbox=mock_sandbox, + ) + + fs_tools = [t for t in tools if t.name in _FS_TOOL_NAMES] + assert len(fs_tools) == len(_FS_TOOL_NAMES) + for tool in fs_tools: + assert isinstance(tool, BaseFileSystemTool) + assert not hasattr(tool, "_sandbox") + + @patch( + "synthorg.tools.sandbox.factory.SubprocessSandbox", + ) + def test_tool_count_unchanged_with_auto_build( + self, + mock_subprocess_cls: MagicMock, + tmp_path: Path, + ) -> None: + """Sandbox wiring doesn't change the number of tools returned.""" + mock_subprocess_cls.return_value = MagicMock(spec=SandboxBackend) + config = RootConfig(company_name="test-corp") + + # Explicit sandbox path + tools_explicit = build_default_tools_from_config( + workspace=tmp_path, + config=config, + sandbox=MagicMock(spec=SandboxBackend), + ) + + # Auto-build from config path + tools_auto = build_default_tools_from_config( + workspace=tmp_path, + config=config, + ) + + assert len(tools_explicit) == _EXPECTED_TOOL_COUNT + assert len(tools_auto) == _EXPECTED_TOOL_COUNT