diff --git a/.claude/skills/aurelio-review-pr/SKILL.md b/.claude/skills/aurelio-review-pr/SKILL.md index 0066693ecb..abe7d5e76a 100644 --- a/.claude/skills/aurelio-review-pr/SKILL.md +++ b/.claude/skills/aurelio-review-pr/SKILL.md @@ -317,11 +317,11 @@ Show the user the complete table, organized by severity (Critical first, Minor l - Total count of items - Count by source (each agent + each external reviewer) -- Any items you recommend skipping (with reasoning) +**Default behavior: implement ALL valid findings** — including pre-existing issues found in surrounding code, suggestions, and anything correctly identified by agents or external reviewers. Do NOT skip items just because they are "pre-existing" or "out of scope" — if a reviewer found a valid issue in code touched by (or adjacent to) this PR, fix it now. Then ask the user using AskUserQuestion with options like: -- "Implement all" (Recommended) +- "Implement all (Recommended)" — this is the default - "Let me review the list first" - "Skip some items" @@ -411,4 +411,4 @@ Report what was done: - If two sources contradict each other, flag it and ask the user. - Do NOT use `--no-verify` or `--amend` for commits. - External feedback fetch failures are non-fatal — retry once, then proceed with local findings if still failing. Mark external coverage as partial in the triage table. -- **Fix everything in the current PR — never defer.** Every valid recommendation must be implemented in this PR regardless of size. No creating GitHub issues for "too large" items, no deferring to future PRs, no marking things as out of scope. If a reviewer flags it and it's valid, fix it now — docstrings, type hints, refactors, all of it. +- **Fix everything valid — never defer, never skip.** Every valid recommendation must be implemented — including pre-existing issues, suggestions, and findings in surrounding code. No creating GitHub issues for "too large" items, no deferring to future PRs, no marking things as "out of scope". If a reviewer flags it and it's valid, fix it now. diff --git a/.claude/skills/pre-pr-review/SKILL.md b/.claude/skills/pre-pr-review/SKILL.md index a822c9600d..55fd3129a6 100644 --- a/.claude/skills/pre-pr-review/SKILL.md +++ b/.claude/skills/pre-pr-review/SKILL.md @@ -290,10 +290,11 @@ Sort by severity (Critical first, Minor last). Show the consolidated table with: - Total count of items - Count by source agent -- Any items recommended to skip (with reasoning) + +**Default behavior: implement ALL valid findings** — including pre-existing issues found in surrounding code, suggestions, and anything the agents correctly identified. Do NOT skip items just because they are "pre-existing" or "out of scope" — if an agent found a valid issue in code touched by (or adjacent to) this PR, fix it now. Ask the user via AskUserQuestion: -- "Implement all" (Recommended) +- "Implement all (Recommended)" — this is the default - "Let me review the list first" - "Skip some items" @@ -395,4 +396,4 @@ Report: - If two agents contradict each other, flag it and ask the user. - Do NOT use `--no-verify` or `--amend` for commits. - Agent failures are non-fatal — proceed with available findings, report failed agents. -- **Fix everything that's approved — never defer.** Every valid recommendation must be implemented. No creating GitHub issues for "too large" items, no deferring to future PRs. +- **Fix everything valid — never defer, never skip.** Every valid recommendation must be implemented — including pre-existing issues, suggestions, and findings in surrounding code. No creating GitHub issues for "too large" items, no deferring to future PRs, no marking things as "out of scope". diff --git a/CLAUDE.md b/CLAUDE.md index d285883809..efa3ff33cc 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -129,6 +129,7 @@ src/ai_company/ - For trivial/docs-only changes: `/pre-pr-review quick` skips agents but still runs automated checks - After the PR exists, use `/aurelio-review-pr` to handle external reviewer feedback - The `/commit-push-pr` command is effectively blocked (it calls `gh pr create` internally) +- **Fix everything valid — never skip**: When review agents find valid issues (including pre-existing issues in surrounding code, suggestions, and findings adjacent to the PR's changes), fix them all. No deferring, no "out of scope" skipping. ## CI diff --git a/DESIGN_SPEC.md b/DESIGN_SPEC.md index 94ad50e812..6c4e5ea8cd 100644 --- a/DESIGN_SPEC.md +++ b/DESIGN_SPEC.md @@ -1682,7 +1682,7 @@ When the LLM requests multiple tool calls in a single turn, `ToolInvoker.invoke_ The `ToolPermissionChecker` resolves permissions using a priority-based system: denied list (highest) → allowed list → access-level categories → deny (default). `AgentEngine._make_tool_invoker()` creates a permission-aware invoker from the agent's `ToolPermissions` at the start of each `run()` call. Note: M3 implements category-level gating only; the granular sub-constraints described in §11.2 (workspace scope, network mode) are planned for when sandboxing is implemented. -> **M3 implementation note — Built-in git tools:** Six workspace-scoped git tools are implemented in `tools/git_tools.py` with a shared `_BaseGitTool` base class in `tools/_git_base.py`: `GitStatusTool`, `GitLogTool`, `GitDiffTool`, `GitBranchTool`, `GitCommitTool`, and `GitCloneTool`. The base class enforces workspace boundary security (path traversal prevention via `resolve()` + `relative_to()`) and provides a common `_run_git()` helper using `asyncio.create_subprocess_exec` (never `shell=True`). Security hardening includes: `GIT_TERMINAL_PROMPT=0` to prevent credential prompts, `GIT_CONFIG_NOSYSTEM=1`, `GIT_CONFIG_GLOBAL=os.devnull`, and `GIT_PROTOCOL_FROM_USER=0` to restrict config/protocol attack surfaces, rejection of flag-like ref/branch values (starting with `-`), URL scheme validation on clone (only `https://`, `ssh://`, `git://`, and SCP-like syntax — plain `http://` rejected for security) with `--` separator before positional URL argument, and clone URLs starting with `-` are rejected. All tools return `ToolExecutionResult` for errors rather than raising exceptions. When a `SandboxBackend` is injected, `_run_git()` delegates subprocess management to the sandbox via `_run_git_sandboxed()` — the sandbox handles environment filtering and workspace-scoped cwd enforcement, while `_validate_path` independently enforces workspace boundaries for git path arguments. Git hardening env vars are passed as `env_overrides` to the sandbox, and `SandboxResult` is converted to `ToolExecutionResult` via `_sandbox_result_to_execution_result`. Without a sandbox, the direct-subprocess path is used (backward compatible). **Future:** Consider adding host/IP allowlisting for clone URLs to prevent SSRF against internal networks (loopback, link-local, private ranges). +> **M3 implementation note — Built-in git tools:** Six workspace-scoped git tools are implemented in `tools/git_tools.py` with a shared `_BaseGitTool` base class in `tools/_git_base.py`: `GitStatusTool`, `GitLogTool`, `GitDiffTool`, `GitBranchTool`, `GitCommitTool`, and `GitCloneTool`. The base class enforces workspace boundary security (path traversal prevention via `resolve()` + `relative_to()`) and provides a common `_run_git()` helper using `asyncio.create_subprocess_exec` (never `shell=True`). Security hardening includes: `GIT_TERMINAL_PROMPT=0` to prevent credential prompts, `GIT_CONFIG_NOSYSTEM=1`, `GIT_CONFIG_GLOBAL=os.devnull`, and `GIT_PROTOCOL_FROM_USER=0` to restrict config/protocol attack surfaces, rejection of flag-like argument values (starting with `-`) for refs, branch names, author filters, date strings, and other git arguments, URL scheme validation on clone (only `https://`, `ssh://`, `git://`, and SCP-like syntax — plain `http://` rejected for security) with `--` separator before positional URL argument, and clone URLs starting with `-` are rejected. All tools return `ToolExecutionResult` for errors rather than raising exceptions. When a `SandboxBackend` is injected, `_run_git()` delegates subprocess management to the sandbox via `_run_git_sandboxed()` — the sandbox handles environment filtering and workspace-scoped cwd enforcement, while `_validate_path` independently enforces workspace boundaries for git path arguments. Git hardening env vars are passed as `env_overrides` to the sandbox, and `SandboxResult` is converted to `ToolExecutionResult` via `_sandbox_result_to_execution_result`. Without a sandbox, the direct-subprocess path is used (backward compatible). Both paths explicitly close the subprocess transport on Windows (via `tools/_process_cleanup.py`) to prevent `ResourceWarning` on `ProactorEventLoop`. **Future:** Consider adding host/IP allowlisting for clone URLs to prevent SSRF against internal networks (loopback, link-local, private ranges). ### 11.1.2 Tool Sandboxing @@ -1694,7 +1694,7 @@ Tool execution requires safety boundaries proportional to the risk of each tool | Backend | Isolation | Latency | Dependencies | Status | |---------|-----------|---------|--------------|--------| -| `SubprocessSandbox` | Process-level: env filtering (allowlist + denylist), restricted PATH, workspace-scoped cwd, timeout + process-group kill, library injection var blocking | ~ms | None | **Implemented** | +| `SubprocessSandbox` | Process-level: env filtering (allowlist + denylist), restricted PATH (configurable via `extra_safe_path_prefixes`), workspace-scoped cwd, timeout + process-group kill, library injection var blocking, explicit transport cleanup on Windows | ~ms | None | **Implemented** | | `DockerSandbox` | Container-level: ephemeral container, mounted workspace, no network, resource limits (CPU/memory/time) | ~1-2s cold start | Docker | Planned | | `K8sSandbox` | Pod-level: per-agent containers, namespace isolation, resource quotas, network policies | ~2-5s | Kubernetes | Future | @@ -2398,6 +2398,7 @@ ai-company/ │ │ │ ├── read_file.py # ReadFileTool │ │ │ └── write_file.py # WriteFileTool │ │ ├── _git_base.py # Base class for git tools (workspace, subprocess, sandbox integration) +│ │ ├── _process_cleanup.py # Subprocess transport cleanup utility (Windows ResourceWarning prevention) │ │ ├── git_tools.py # Git operations — 6 built-in tools (sandbox-aware) │ │ ├── code_runner.py # Code execution (M3) │ │ ├── web_tools.py # HTTP, search (M3) diff --git a/README.md b/README.md index 3c82b76418..152eb36a81 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,12 @@ AI Company lets you spin up a virtual organization staffed entirely by AI agents - **Vue 3** for web dashboard (planned) - **SQLite** → PostgreSQL for data persistence (planned) +## System Requirements + +- **Python 3.14+** +- **uv** — package manager ([install](https://docs.astral.sh/uv/getting-started/installation/)) +- **Git 2.x+** — required at runtime for built-in git tools (subprocess-based, not a Python binding) + ## Getting Started ```bash diff --git a/src/ai_company/engine/shutdown.py b/src/ai_company/engine/shutdown.py index 023f2896e1..2b1168c355 100644 --- a/src/ai_company/engine/shutdown.py +++ b/src/ai_company/engine/shutdown.py @@ -14,11 +14,9 @@ import signal import sys import time +import types # noqa: TC003 — used in runtime-visible annotation from collections.abc import Callable, Coroutine, Mapping, Sequence -from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable - -if TYPE_CHECKING: - import types +from typing import Any, Protocol, runtime_checkable from pydantic import BaseModel, ConfigDict, Field @@ -26,6 +24,7 @@ from ai_company.observability import get_logger from ai_company.observability.events.execution import ( EXECUTION_SHUTDOWN_CLEANUP, + EXECUTION_SHUTDOWN_CLEANUP_FAILED, EXECUTION_SHUTDOWN_CLEANUP_TIMEOUT, EXECUTION_SHUTDOWN_COMPLETE, EXECUTION_SHUTDOWN_FORCE_CANCEL, @@ -243,13 +242,41 @@ async def _wait_and_cancel( pending, timeout=self._CANCEL_PROPAGATION_TIMEOUT, ) - for task in cancel_done: - if not task.cancelled(): - with contextlib.suppress(Exception): - task.exception() + self._log_post_cancel_exceptions(cancel_done) return tasks_completed, len(pending) + def _log_post_cancel_exceptions( + self, + tasks: set[asyncio.Task[Any]], + ) -> None: + """Retrieve and log exceptions from post-cancel tasks. + + Retrieving the exception prevents asyncio's "Task exception was + never retrieved" warning. Non-cancelled tasks with exceptions + are logged at DEBUG. + """ + for task in tasks: + if task.cancelled(): + continue + try: + exc = task.exception() + except asyncio.InvalidStateError: + logger.debug( + EXECUTION_SHUTDOWN_TASK_ERROR, + error="Failed to inspect post-cancel task: InvalidStateError", + task_name=task.get_name(), + ) + else: + if exc is not None: + logger.debug( + EXECUTION_SHUTDOWN_TASK_ERROR, + error=( + f"Post-cancel task exception: {type(exc).__name__}: {exc}" + ), + task_name=task.get_name(), + ) + async def _run_cleanup( self, callbacks: Sequence[CleanupCallback], @@ -279,10 +306,9 @@ async def _run_all() -> None: except Exception: all_succeeded = False logger.exception( - EXECUTION_SHUTDOWN_CLEANUP, + EXECUTION_SHUTDOWN_CLEANUP_FAILED, callback_index=i, callback_count=len(callbacks), - error="Cleanup callback failed", ) try: @@ -358,8 +384,12 @@ def _handle_signal(self, sig: signal.Signals) -> None: logger.exception( EXECUTION_SHUTDOWN_SIGNAL, signal=sig.name, - error="request_shutdown() raised in signal handler", + error="request_shutdown() raised — falling back to loop.stop()", ) + # If request_shutdown() itself fails, stop the event loop as + # a last resort to avoid a process that ignores signals. + with contextlib.suppress(Exception): + asyncio.get_running_loop().stop() def _handle_signal_threadsafe( self, @@ -387,17 +417,28 @@ def _on_loop() -> None: logger.exception( EXECUTION_SHUTDOWN_SIGNAL, signal=sig_name, - error="request_shutdown() raised in signal handler", + error="request_shutdown() raised — falling back to loop.stop()", ) + with contextlib.suppress(Exception): + asyncio.get_running_loop().stop() try: loop = asyncio.get_running_loop() loop.call_soon_threadsafe(_on_loop) except RuntimeError: # No running event loop — call directly (best-effort). - # Cannot log safely without a loop, so suppress all errors. - with contextlib.suppress(Exception): + # Cannot use structlog (acquires locks) so fall back to + # stderr for last-resort visibility. + try: self._strategy.request_shutdown() + except Exception: + try: + sys.stderr.write( + f"[shutdown] request_shutdown() failed for signal {sig_name}\n" + ) + sys.stderr.flush() + except Exception: # noqa: S110 + pass def register_task( self, diff --git a/src/ai_company/observability/events/execution.py b/src/ai_company/observability/events/execution.py index d4f35be7e5..e1855b2a25 100644 --- a/src/ai_company/observability/events/execution.py +++ b/src/ai_company/observability/events/execution.py @@ -42,6 +42,7 @@ EXECUTION_SHUTDOWN_GRACE_START: Final[str] = "execution.shutdown.grace_start" EXECUTION_SHUTDOWN_FORCE_CANCEL: Final[str] = "execution.shutdown.force_cancel" EXECUTION_SHUTDOWN_CLEANUP: Final[str] = "execution.shutdown.cleanup" +EXECUTION_SHUTDOWN_CLEANUP_FAILED: Final[str] = "execution.shutdown.cleanup.failed" EXECUTION_SHUTDOWN_CLEANUP_TIMEOUT: Final[str] = "execution.shutdown.cleanup.timeout" EXECUTION_SHUTDOWN_COMPLETE: Final[str] = "execution.shutdown.complete" EXECUTION_LOOP_SHUTDOWN: Final[str] = "execution.loop.shutdown" diff --git a/src/ai_company/observability/events/sandbox.py b/src/ai_company/observability/events/sandbox.py index 8c99fe722e..ea07fd49a6 100644 --- a/src/ai_company/observability/events/sandbox.py +++ b/src/ai_company/observability/events/sandbox.py @@ -13,3 +13,4 @@ SANDBOX_PATH_FALLBACK: Final[str] = "sandbox.path.fallback" SANDBOX_HEALTH_CHECK: Final[str] = "sandbox.health_check" SANDBOX_KILL_FAILED: Final[str] = "sandbox.kill.failed" +SANDBOX_KILL_FALLBACK: Final[str] = "sandbox.kill.fallback" diff --git a/src/ai_company/observability/events/tool.py b/src/ai_company/observability/events/tool.py index fc73b99805..37c47b5b32 100644 --- a/src/ai_company/observability/events/tool.py +++ b/src/ai_company/observability/events/tool.py @@ -39,3 +39,8 @@ TOOL_FS_PARENT_NOT_FOUND: Final[str] = "tool.fs.parent_not_found" TOOL_FS_GLOB_REJECTED: Final[str] = "tool.fs.glob_rejected" TOOL_FS_NOOP: Final[str] = "tool.fs.noop" + +# ── Subprocess utility events ─────────────────────────────────── +TOOL_SUBPROCESS_TRANSPORT_CLOSE_FAILED: Final[str] = ( + "tool.subprocess.transport_close_failed" +) diff --git a/src/ai_company/tools/_git_base.py b/src/ai_company/tools/_git_base.py index e7aaf80e50..9f223f2c81 100644 --- a/src/ai_company/tools/_git_base.py +++ b/src/ai_company/tools/_git_base.py @@ -37,6 +37,7 @@ GIT_REF_INJECTION_BLOCKED, GIT_WORKSPACE_VIOLATION, ) +from ai_company.tools._process_cleanup import close_subprocess_transport from ai_company.tools.base import BaseTool, ToolExecutionResult from ai_company.tools.sandbox.errors import SandboxError @@ -71,11 +72,27 @@ ) +_CONTROL_CHAR_RE = re.compile(r"[\x00-\x1f\x7f]+") +_MAX_STDERR_FRAGMENT: Final[int] = 500 + + def _sanitize_command(args: list[str]) -> list[str]: """Redact embedded credentials from git command args for logging.""" return [_CREDENTIAL_RE.sub(r"\1***@", a) for a in args] +def _sanitize_stderr(raw: str) -> str: + """Replace control characters, redact credentials, and truncate. + + All control characters (including newlines, tabs, and carriage + returns) are collapsed into single spaces to prevent log injection + and LLM prompt injection via stderr content. Embedded credentials + (``https://user:token@host``) are redacted before truncation. + """ + sanitized = _CONTROL_CHAR_RE.sub(" ", raw).strip() + return _CREDENTIAL_RE.sub(r"\1***@", sanitized)[:_MAX_STDERR_FRAGMENT] + + class _BaseGitTool(BaseTool, ABC): """Shared base for all git tools. @@ -185,7 +202,7 @@ def _check_paths(self, paths: list[str]) -> ToolExecutionResult | None: ) return None - def _check_ref( + def _check_git_arg( self, value: str, *, @@ -193,8 +210,11 @@ def _check_ref( ) -> ToolExecutionResult | None: """Reject values starting with ``-`` to prevent flag injection. + Used for refs, branch names, author filters, date strings, and + any other git argument that must not be interpreted as a flag. + Args: - value: The ref or branch name string to validate. + value: The argument string to validate. param: Parameter name for the error message. Returns: @@ -305,8 +325,15 @@ async def _await_git_process( except TimeoutError: with contextlib.suppress(ProcessLookupError): proc.kill() + stderr_fragment = "" try: - await asyncio.wait_for(proc.communicate(), timeout=5.0) + _, raw_stderr = await asyncio.wait_for( + proc.communicate(), + timeout=5.0, + ) + raw = raw_stderr.decode("utf-8", errors="replace").strip() + # Sanitize: strip control chars and truncate for safety. + stderr_fragment = _sanitize_stderr(raw) except TimeoutError: logger.warning( GIT_COMMAND_FAILED, @@ -317,9 +344,13 @@ async def _await_git_process( GIT_COMMAND_TIMEOUT, command=_sanitize_command(["git", *args]), deadline=deadline, + stderr_fragment=stderr_fragment, ) + msg = f"Git command timed out after {deadline}s" + if stderr_fragment: + msg += f": {stderr_fragment}" return ToolExecutionResult( - content=f"Git command timed out after {deadline}s", + content=msg, is_error=True, ) @@ -385,13 +416,20 @@ def _sandbox_result_to_execution_result( A ``ToolExecutionResult`` with the appropriate content. """ if result.timed_out: + stderr_fragment = ( + _sanitize_stderr(result.stderr.strip()) if result.stderr else "" + ) logger.warning( GIT_COMMAND_TIMEOUT, command=_sanitize_command(["git", *args]), deadline=deadline, + stderr_fragment=stderr_fragment, ) + msg = f"Git command timed out after {deadline}s" + if stderr_fragment: + msg += f": {stderr_fragment}" return ToolExecutionResult( - content=result.stderr or "Git command timed out", + content=msg, is_error=True, ) if result.returncode != 0: @@ -498,18 +536,21 @@ async def _run_git_direct( if isinstance(proc_or_err, ToolExecutionResult): return proc_or_err - output_or_err = await self._await_git_process( - proc_or_err, - args, - deadline=deadline, - ) - if isinstance(output_or_err, ToolExecutionResult): - return output_or_err - - stdout_bytes, stderr_bytes = output_or_err - return self._process_git_output( - args, - proc_or_err.returncode, - stdout_bytes, - stderr_bytes, - ) + try: + output_or_err = await self._await_git_process( + proc_or_err, + args, + deadline=deadline, + ) + if isinstance(output_or_err, ToolExecutionResult): + return output_or_err + + stdout_bytes, stderr_bytes = output_or_err + return self._process_git_output( + args, + proc_or_err.returncode, + stdout_bytes, + stderr_bytes, + ) + finally: + close_subprocess_transport(proc_or_err) diff --git a/src/ai_company/tools/_process_cleanup.py b/src/ai_company/tools/_process_cleanup.py new file mode 100644 index 0000000000..2deca61985 --- /dev/null +++ b/src/ai_company/tools/_process_cleanup.py @@ -0,0 +1,44 @@ +"""Shared subprocess transport cleanup utility. + +Provides a safe helper for closing asyncio subprocess transports +to prevent ``ResourceWarning`` on Windows with ``ProactorEventLoop``. +""" + +import asyncio # noqa: TC003 — used in runtime-visible annotation + +from ai_company.observability import get_logger +from ai_company.observability.events.tool import ( + TOOL_SUBPROCESS_TRANSPORT_CLOSE_FAILED, +) + +logger = get_logger(__name__) + + +def close_subprocess_transport(proc: asyncio.subprocess.Process) -> None: + """Close subprocess transport to prevent ResourceWarning on Windows. + + On Windows with ProactorEventLoop, pipe transports may not be + closed promptly after kill+communicate, causing ResourceWarning + at GC time. Explicitly closing the transport avoids this. + + Uses ``getattr`` to access the CPython-internal ``_transport`` + attribute — if the attribute is absent (different runtime or + future CPython version), this is a no-op. Exceptions from + ``close()`` and ``is_closing()`` are logged and suppressed so + cleanup never masks the primary result. + + Args: + proc: The subprocess whose transport should be closed. + """ + transport = getattr(proc, "_transport", None) + if transport is None: + return + try: + if transport.is_closing(): + return + transport.close() + except Exception: + logger.debug( + TOOL_SUBPROCESS_TRANSPORT_CLOSE_FAILED, + exc_info=True, + ) diff --git a/src/ai_company/tools/file_system/list_directory.py b/src/ai_company/tools/file_system/list_directory.py index 01b393d313..1f91d5a69c 100644 --- a/src/ai_company/tools/file_system/list_directory.py +++ b/src/ai_company/tools/file_system/list_directory.py @@ -71,12 +71,14 @@ def _list_sync( pattern: str | None, *, recursive: bool, -) -> list[str]: +) -> tuple[list[str], bool]: """Collect directory entries synchronously. Returns: - Formatted strings with type prefixes, capped at - ``MAX_ENTRIES + 1`` to allow truncation detection. + A tuple of ``(lines, raw_capped)`` where *lines* are + formatted strings with type prefixes and *raw_capped* is + ``True`` when the raw scan hit the ``MAX_ENTRIES`` limit + (meaning the directory may contain more entries). """ glob_pattern = pattern or "*" if recursive: @@ -87,9 +89,10 @@ def _list_sync( raw_iter = resolved.iterdir() entries = sorted(itertools.islice(raw_iter, MAX_ENTRIES + 1)) + raw_capped = len(entries) > MAX_ENTRIES lines: list[str] = [] - for entry in entries: + for entry in entries[:MAX_ENTRIES]: try: line = _classify_entry( entry, @@ -107,7 +110,7 @@ def _list_sync( ) lines.append(f"[ERROR] {entry.name} ({exc})") - return lines + return lines, raw_capped class ListDirectoryTool(BaseFileSystemTool): @@ -229,28 +232,35 @@ def _resolve_and_check_dir( def _format_listing_result( user_path: str, lines: list[str], + *, + raw_capped: bool, ) -> tuple[str, dict[str, Any]]: - """Build output text and metadata from listing lines.""" + """Build output text and metadata from listing lines. + + Args: + user_path: The user-supplied directory path. + lines: Classified entry lines from ``_list_sync``. + raw_capped: Whether the raw filesystem scan hit the + ``MAX_ENTRIES`` limit (directory may contain more). + """ total = len(lines) - truncated = total > MAX_ENTRIES - if truncated: - lines = lines[:MAX_ENTRIES] dir_count = sum(1 for ln in lines if ln.startswith("[DIR]")) file_count = sum(1 for ln in lines if ln.startswith("[FILE]")) if not lines: output = f"Directory is empty: {user_path}" else: output = "\n".join(lines) - if truncated: + if raw_capped: output += ( - f"\n\n[Truncated: showing first {MAX_ENTRIES} of {total} entries]" + f"\n\n[Truncated: showing {total} entries;" + " directory may contain more]" ) metadata = { "path": user_path, "total_entries": total, "directories": dir_count, "files": file_count, - "truncated": truncated, + "truncated": raw_capped, } return output, metadata @@ -280,7 +290,7 @@ async def execute( return resolved_or_err try: - lines = await asyncio.to_thread( + lines, raw_capped = await asyncio.to_thread( _list_sync, resolved_or_err, self.workspace_root, @@ -304,7 +314,11 @@ async def execute( is_error=True, ) - output, metadata = self._format_listing_result(user_path, lines) + output, metadata = self._format_listing_result( + user_path, + lines, + raw_capped=raw_capped, + ) logger.info( TOOL_FS_LIST, path=user_path, diff --git a/src/ai_company/tools/git_tools.py b/src/ai_company/tools/git_tools.py index caabcfd9cb..1c2a28c3ae 100644 --- a/src/ai_company/tools/git_tools.py +++ b/src/ai_company/tools/git_tools.py @@ -215,7 +215,7 @@ def _build_filter_args( ("until", "--until"), ): if value := arguments.get(param): - if err := self._check_ref(value, param=param): + if err := self._check_git_arg(value, param=param): return err filter_args.append(f"{flag}={value}") return filter_args @@ -249,7 +249,7 @@ async def execute( args.extend(filter_args) if ref := arguments.get("ref"): - if err := self._check_ref(ref, param="ref"): + if err := self._check_git_arg(ref, param="ref"): return err args.append(ref) @@ -351,7 +351,7 @@ async def execute( # noqa: C901 args.append("--stat") if ref1 := arguments.get("ref1"): - if err := self._check_ref(ref1, param="ref1"): + if err := self._check_git_arg(ref1, param="ref1"): return err args.append(ref1) if ref2 := arguments.get("ref2"): @@ -360,7 +360,7 @@ async def execute( # noqa: C901 content="ref2 requires ref1 to be specified", is_error=True, ) - if err := self._check_ref(ref2, param="ref2"): + if err := self._check_git_arg(ref2, param="ref2"): return err args.append(ref2) @@ -462,7 +462,7 @@ async def _create_branch( """Create a branch, optionally from a start point.""" args = ["branch", name] if start_point := arguments.get("start_point"): - if err := self._check_ref(start_point, param="start_point"): + if err := self._check_git_arg(start_point, param="start_point"): return err args.append(start_point) return await self._run_git(args) @@ -495,7 +495,7 @@ async def execute( # noqa: PLR0911 # Narrowing: guaranteed non-None by guard above. branch_name: str = name # type: ignore[assignment] - if err := self._check_ref(branch_name, param="name"): + if err := self._check_git_arg(branch_name, param="name"): return err if action == "create": @@ -687,7 +687,7 @@ async def execute( schemes = ", ".join(_ALLOWED_CLONE_SCHEMES) return ToolExecutionResult( content=( - f"Invalid clone URL. Only {schemes}" + f"Invalid clone URL. Only {schemes} " "and SCP-like (user@host:path) URLs are " "allowed" ), @@ -697,7 +697,7 @@ async def execute( args = ["clone"] if branch := arguments.get("branch"): - if err := self._check_ref(branch, param="branch"): + if err := self._check_git_arg(branch, param="branch"): return err args.extend(["--branch", branch]) diff --git a/src/ai_company/tools/sandbox/config.py b/src/ai_company/tools/sandbox/config.py index c592be3c9c..d509f81ae2 100644 --- a/src/ai_company/tools/sandbox/config.py +++ b/src/ai_company/tools/sandbox/config.py @@ -1,6 +1,8 @@ """Subprocess sandbox configuration model.""" -from pydantic import BaseModel, ConfigDict, Field +from pathlib import PurePath + +from pydantic import BaseModel, ConfigDict, Field, field_validator class SubprocessSandboxConfig(BaseModel): @@ -17,6 +19,8 @@ class SubprocessSandboxConfig(BaseModel): env_denylist_patterns: fnmatch patterns to strip even if in the allowlist (e.g. ``*KEY*`` catches ``API_KEY``). Includes secret-name heuristics and library injection vars. + extra_safe_path_prefixes: Additional non-empty absolute PATH + prefixes appended to platform defaults for the PATH filter. """ model_config = ConfigDict(frozen=True) @@ -58,3 +62,22 @@ class SubprocessSandboxConfig(BaseModel): "RUBYLIB", "PERL5LIB", ) + extra_safe_path_prefixes: tuple[str, ...] = () + """Additional safe PATH prefixes appended to platform defaults. + + Use this to allow tool-specific directories (e.g. a custom Git + install location) through the PATH filter without modifying the + built-in platform defaults. + """ + + @field_validator("extra_safe_path_prefixes") + @classmethod + def _validate_prefixes(cls, v: tuple[str, ...]) -> tuple[str, ...]: + for prefix in v: + if not prefix or not PurePath(prefix).is_absolute(): + msg = ( + "extra_safe_path_prefixes entries must be " + f"non-empty absolute paths, got: {prefix!r}" + ) + raise ValueError(msg) + return v diff --git a/src/ai_company/tools/sandbox/subprocess_sandbox.py b/src/ai_company/tools/sandbox/subprocess_sandbox.py index 6e67a2bab2..24799daeda 100644 --- a/src/ai_company/tools/sandbox/subprocess_sandbox.py +++ b/src/ai_company/tools/sandbox/subprocess_sandbox.py @@ -24,10 +24,12 @@ SANDBOX_EXECUTE_TIMEOUT, SANDBOX_HEALTH_CHECK, SANDBOX_KILL_FAILED, + SANDBOX_KILL_FALLBACK, SANDBOX_PATH_FALLBACK, SANDBOX_SPAWN_FAILED, SANDBOX_WORKSPACE_VIOLATION, ) +from ai_company.tools._process_cleanup import close_subprocess_transport from ai_company.tools.sandbox.config import SubprocessSandboxConfig from ai_company.tools.sandbox.errors import ( SandboxError, @@ -188,18 +190,24 @@ def _is_safe_path_entry( return True return False - @staticmethod - def _get_safe_path_prefixes() -> tuple[str, ...]: - """Return safe PATH prefixes for the current platform.""" + def _get_safe_path_prefixes(self) -> tuple[str, ...]: + """Return safe PATH prefixes for the current platform. + + Combines built-in platform defaults with any extra prefixes + from ``SubprocessSandboxConfig.extra_safe_path_prefixes``. + """ + defaults: tuple[str, ...] if os.name == "nt": system_root = os.environ.get("SYSTEMROOT", r"C:\WINDOWS") - return ( + defaults = ( system_root, str(Path(system_root) / "system32"), r"C:\Program Files\Git", r"C:\Program Files (x86)\Git", ) - return ("/usr/bin", "/usr/local/bin", "/bin", "/usr/sbin", "/sbin") + else: + defaults = ("/usr/bin", "/usr/local/bin", "/bin", "/usr/sbin", "/sbin") + return defaults + self._config.extra_safe_path_prefixes def _build_filtered_env( self, @@ -280,10 +288,15 @@ def _kill_process(proc: asyncio.subprocess.Process) -> None: """ if _HAS_PROCESS_GROUPS: try: - os.killpg(os.getpgid(proc.pid), signal.SIGKILL) # type: ignore[attr-defined] + os.killpg(os.getpgid(proc.pid), signal.SIGKILL) # type: ignore[attr-defined,unused-ignore] except ProcessLookupError: return - except OSError: + except OSError as kill_exc: + logger.warning( + SANDBOX_KILL_FALLBACK, + pid=proc.pid, + error=str(kill_exc), + ) with contextlib.suppress(ProcessLookupError): proc.kill() return @@ -292,6 +305,16 @@ def _kill_process(proc: asyncio.subprocess.Process) -> None: with contextlib.suppress(ProcessLookupError): proc.kill() + @staticmethod + def _close_process(proc: asyncio.subprocess.Process) -> None: + """Close subprocess transport to prevent ResourceWarning on Windows. + + Delegates to :func:`close_subprocess_transport` — see its + docstring for details on the CPython-internal ``_transport`` + access and error handling. + """ + close_subprocess_transport(proc) + async def _spawn_process( self, command: str, @@ -399,7 +422,7 @@ async def _drain_after_kill( pid=proc.pid, error="process did not terminate 5s after kill", ) - return b"", b"" + return b"", b"[sandbox] process did not terminate after kill" async def execute( self, @@ -443,12 +466,19 @@ async def execute( ) proc = await self._spawn_process(command, args, work_dir, env) - stdout_bytes, stderr_bytes, timed_out = await self._communicate_with_timeout( - proc, - command, - args, - effective_timeout, - ) + try: + ( + stdout_bytes, + stderr_bytes, + timed_out, + ) = await self._communicate_with_timeout( + proc, + command, + args, + effective_timeout, + ) + finally: + self._close_process(proc) stdout = stdout_bytes.decode("utf-8", errors="replace").strip() stderr = stderr_bytes.decode("utf-8", errors="replace").strip() diff --git a/tests/unit/engine/test_agent_engine_errors.py b/tests/unit/engine/test_agent_engine_errors.py index 2f3c9e1ed0..b288128134 100644 --- a/tests/unit/engine/test_agent_engine_errors.py +++ b/tests/unit/engine/test_agent_engine_errors.py @@ -261,7 +261,8 @@ async def test_handle_fatal_error_secondary_failure_raises_original( identity=sample_agent_with_personality, task=sample_task_with_criteria, ) - assert exc_info.value.__cause__ is None + assert isinstance(exc_info.value.__cause__, ValueError) + assert "secondary failure" in str(exc_info.value.__cause__) @pytest.mark.unit diff --git a/tests/unit/engine/test_shutdown.py b/tests/unit/engine/test_shutdown.py index cd8a4112fb..309171af97 100644 --- a/tests/unit/engine/test_shutdown.py +++ b/tests/unit/engine/test_shutdown.py @@ -412,3 +412,93 @@ def test_zero_grace_seconds_rejected(self) -> None: def test_blank_strategy_rejected(self) -> None: with pytest.raises(ValidationError): GracefulShutdownConfig(strategy=" ") + + +# ── _log_post_cancel_exceptions ─────────────────────────────────── + + +@pytest.mark.unit +class TestLogPostCancelExceptions: + """Extracted helper retrieves exceptions without swallowing them.""" + + def test_skips_cancelled_tasks(self) -> None: + strategy = CooperativeTimeoutStrategy() + task = MagicMock(spec=asyncio.Task) + task.cancelled.return_value = True + # Should not call task.exception() + strategy._log_post_cancel_exceptions({task}) + task.exception.assert_not_called() + + def test_logs_task_exception(self) -> None: + strategy = CooperativeTimeoutStrategy() + task = MagicMock(spec=asyncio.Task) + task.cancelled.return_value = False + task.exception.return_value = RuntimeError("boom") + task.get_name.return_value = "test-task" + # Should not raise + strategy._log_post_cancel_exceptions({task}) + task.exception.assert_called_once() + + def test_handles_no_exception(self) -> None: + strategy = CooperativeTimeoutStrategy() + task = MagicMock(spec=asyncio.Task) + task.cancelled.return_value = False + task.exception.return_value = None + task.get_name.return_value = "test-task" + strategy._log_post_cancel_exceptions({task}) + task.exception.assert_called_once() + + def test_handles_invalid_state_error(self) -> None: + strategy = CooperativeTimeoutStrategy() + task = MagicMock(spec=asyncio.Task) + task.cancelled.return_value = False + task.exception.side_effect = asyncio.InvalidStateError + task.get_name.return_value = "test-task" + # Should not raise — logs at DEBUG instead of silent pass + strategy._log_post_cancel_exceptions({task}) + + +# ── Signal handler recovery ────────────────────────────────────── + + +@pytest.mark.unit +class TestSignalHandlerRecovery: + """Signal handler falls back to loop.stop() on strategy failure.""" + + def test_handle_signal_unix_falls_back_to_loop_stop(self) -> None: + strategy = MagicMock(spec=ShutdownStrategy) + strategy.request_shutdown.side_effect = RuntimeError("broken") + manager = ShutdownManager(strategy=strategy) + mock_loop = MagicMock() + with patch("asyncio.get_running_loop", return_value=mock_loop): + manager._handle_signal(signal.SIGINT) + mock_loop.stop.assert_called_once() + + def test_handle_signal_threadsafe_no_loop_stderr_fallback(self) -> None: + strategy = MagicMock(spec=ShutdownStrategy) + strategy.request_shutdown.side_effect = RuntimeError("broken") + manager = ShutdownManager(strategy=strategy) + with ( + patch( + "asyncio.get_running_loop", + side_effect=RuntimeError("no loop"), + ), + patch("sys.stderr") as mock_stderr, + ): + manager._handle_signal_threadsafe(signal.SIGINT.value, None) + mock_stderr.write.assert_called_once() + + def test_handle_signal_threadsafe_no_loop_stderr_also_fails(self) -> None: + strategy = MagicMock(spec=ShutdownStrategy) + strategy.request_shutdown.side_effect = RuntimeError("broken") + manager = ShutdownManager(strategy=strategy) + with ( + patch( + "asyncio.get_running_loop", + side_effect=RuntimeError("no loop"), + ), + patch("sys.stderr") as mock_stderr, + ): + mock_stderr.write.side_effect = OSError("stderr closed") + # Should not raise even when stderr fails + manager._handle_signal_threadsafe(signal.SIGINT.value, None) diff --git a/tests/unit/tools/file_system/test_list_directory.py b/tests/unit/tools/file_system/test_list_directory.py index 4092b0f769..21b8fe1d85 100644 --- a/tests/unit/tools/file_system/test_list_directory.py +++ b/tests/unit/tools/file_system/test_list_directory.py @@ -116,7 +116,7 @@ async def test_large_directory_truncation(self, workspace: Path) -> None: assert not result.is_error assert "Truncated" in result.content assert result.metadata is not None - assert result.metadata["total_entries"] > MAX_ENTRIES + assert result.metadata["total_entries"] == MAX_ENTRIES assert result.metadata["truncated"] is True async def test_unsafe_glob_pattern_rejected( diff --git a/tests/unit/tools/git/test_git_tools.py b/tests/unit/tools/git/test_git_tools.py index 228ac9348c..944350b91c 100644 --- a/tests/unit/tools/git/test_git_tools.py +++ b/tests/unit/tools/git/test_git_tools.py @@ -726,6 +726,95 @@ async def slow_communicate() -> tuple[bytes, bytes]: assert "timed out" in result.content mock_proc.kill.assert_called_once() + async def test_timeout_includes_stderr_fragment( + self, + status_tool: GitStatusTool, + ) -> None: + """Timeout message includes stderr when available.""" + calls = 0 + + async def slow_then_stderr() -> tuple[bytes, bytes]: + nonlocal calls + calls += 1 + if calls == 1: + await asyncio.sleep(999) + return b"", b"fatal: could not access repository" + + mock_proc = MagicMock() + mock_proc.communicate = slow_then_stderr + mock_proc.kill = MagicMock() + mock_proc._transport = None # no transport to close + + with patch( + "asyncio.create_subprocess_exec", + return_value=mock_proc, + ): + result = await status_tool._run_git(["status"], deadline=0.01) + + assert result.is_error + assert "timed out" in result.content + assert "fatal: could not access repository" in result.content + + async def test_timeout_sanitizes_stderr_control_chars( + self, + status_tool: GitStatusTool, + ) -> None: + """Control characters in stderr are stripped.""" + calls = 0 + + async def slow_then_dirty_stderr() -> tuple[bytes, bytes]: + nonlocal calls + calls += 1 + if calls == 1: + await asyncio.sleep(999) + return b"", b"error\x00with\x07control\x1fchars" + + mock_proc = MagicMock() + mock_proc.communicate = slow_then_dirty_stderr + mock_proc.kill = MagicMock() + mock_proc._transport = None + + with patch( + "asyncio.create_subprocess_exec", + return_value=mock_proc, + ): + result = await status_tool._run_git(["status"], deadline=0.01) + + assert result.is_error + assert "\x00" not in result.content + assert "\x07" not in result.content + assert "error with control chars" in result.content + + async def test_timeout_sanitizes_stderr_truncation( + self, + status_tool: GitStatusTool, + ) -> None: + """Stderr fragment is truncated to 500 characters.""" + calls = 0 + + async def slow_then_long_stderr() -> tuple[bytes, bytes]: + nonlocal calls + calls += 1 + if calls == 1: + await asyncio.sleep(999) + return b"", ("x" * 1000).encode() + + mock_proc = MagicMock() + mock_proc.communicate = slow_then_long_stderr + mock_proc.kill = MagicMock() + mock_proc._transport = None + + with patch( + "asyncio.create_subprocess_exec", + return_value=mock_proc, + ): + result = await status_tool._run_git(["status"], deadline=0.01) + + assert result.is_error + # 500 chars from stderr + prefix "Git command timed out after 0.01s: " + stderr_part = result.content.split(": ", 1)[1] + assert len(stderr_part) == 500 + async def test_oserror_returns_error(self, status_tool: GitStatusTool) -> None: """OSError when git binary not found returns error result.""" with patch( diff --git a/tests/unit/tools/sandbox/test_subprocess_sandbox.py b/tests/unit/tools/sandbox/test_subprocess_sandbox.py index 70f4c49a02..57bd200864 100644 --- a/tests/unit/tools/sandbox/test_subprocess_sandbox.py +++ b/tests/unit/tools/sandbox/test_subprocess_sandbox.py @@ -5,6 +5,7 @@ from unittest.mock import patch import pytest +from pydantic import ValidationError from ai_company.tools.sandbox.config import SubprocessSandboxConfig from ai_company.tools.sandbox.errors import SandboxError, SandboxStartError @@ -439,3 +440,83 @@ def test_backend_type( subprocess_sandbox: SubprocessSandbox, ) -> None: assert subprocess_sandbox.get_backend_type() == "subprocess" + + +# ── extra_safe_path_prefixes ───────────────────────────────────── + + +class TestExtraSafePathPrefixes: + """Tests for configurable safe PATH prefixes.""" + + def test_extra_prefixes_included( + self, + sandbox_workspace: Path, + ) -> None: + extra = (r"C:\CustomTools",) if os.name == "nt" else ("/opt/custom/bin",) + config = SubprocessSandboxConfig( + extra_safe_path_prefixes=extra, + ) + sandbox = SubprocessSandbox( + config=config, + workspace=sandbox_workspace, + ) + prefixes = sandbox._get_safe_path_prefixes() + assert extra[0] in prefixes + + def test_extra_prefix_survives_path_filter( + self, + sandbox_workspace: Path, + ) -> None: + extra, fake_path = ( + ( + (r"C:\CustomTools",), + r"C:\CustomTools\bin;C:\suspicious\dir", + ) + if os.name == "nt" + else ( + ("/opt/custom",), + "/opt/custom/bin:/suspicious/dir", + ) + ) + config = SubprocessSandboxConfig( + restricted_path=True, + extra_safe_path_prefixes=extra, + ) + sandbox = SubprocessSandbox( + config=config, + workspace=sandbox_workspace, + ) + with patch.dict( + os.environ, + {"PATH": fake_path}, + clear=True, + ): + env = sandbox._build_filtered_env() + assert "suspicious" not in env.get("PATH", "").lower() + assert "custom" in env.get("PATH", "").lower() + + def test_default_empty_no_change( + self, + sandbox_workspace: Path, + ) -> None: + config = SubprocessSandboxConfig() + sandbox = SubprocessSandbox( + config=config, + workspace=sandbox_workspace, + ) + defaults_only = SubprocessSandbox( + workspace=sandbox_workspace, + ) + assert ( + sandbox._get_safe_path_prefixes() == defaults_only._get_safe_path_prefixes() + ) + + def test_rejects_empty_string(self) -> None: + with pytest.raises(ValidationError, match="non-empty absolute"): + SubprocessSandboxConfig(extra_safe_path_prefixes=("",)) + + def test_rejects_relative_path(self) -> None: + with pytest.raises(ValidationError, match="non-empty absolute"): + SubprocessSandboxConfig( + extra_safe_path_prefixes=("relative/path",), + ) diff --git a/tests/unit/tools/test_process_cleanup.py b/tests/unit/tools/test_process_cleanup.py new file mode 100644 index 0000000000..d89071e02f --- /dev/null +++ b/tests/unit/tools/test_process_cleanup.py @@ -0,0 +1,70 @@ +"""Tests for the subprocess transport cleanup utility.""" + +from unittest.mock import MagicMock, PropertyMock + +import pytest + +from ai_company.tools._process_cleanup import close_subprocess_transport + +pytestmark = [pytest.mark.unit, pytest.mark.timeout(30)] + + +class TestCloseSubprocessTransport: + """close_subprocess_transport handles all transport states safely.""" + + def test_noop_when_transport_is_none(self) -> None: + """No-op when _transport attribute is absent.""" + proc = MagicMock() + proc._transport = None + close_subprocess_transport(proc) + # Should not raise + + def test_noop_when_transport_attr_missing(self) -> None: + """No-op when _transport attribute does not exist.""" + proc = MagicMock(spec=[]) # empty spec — no _transport + close_subprocess_transport(proc) + + def test_noop_when_transport_is_closing(self) -> None: + """No-op when transport is already closing.""" + proc = MagicMock() + transport = MagicMock() + transport.is_closing.return_value = True + proc._transport = transport + close_subprocess_transport(proc) + transport.close.assert_not_called() + + def test_closes_open_transport(self) -> None: + """Closes transport when it is open.""" + proc = MagicMock() + transport = MagicMock() + transport.is_closing.return_value = False + proc._transport = transport + close_subprocess_transport(proc) + transport.close.assert_called_once() + + def test_suppresses_close_exception(self) -> None: + """Exception from transport.close() is logged and suppressed.""" + proc = MagicMock() + transport = MagicMock() + transport.is_closing.return_value = False + transport.close.side_effect = OSError("pipe broken") + proc._transport = transport + # Should not raise + close_subprocess_transport(proc) + + def test_suppresses_is_closing_exception(self) -> None: + """Exception from transport.is_closing() is logged and suppressed.""" + proc = MagicMock() + transport = MagicMock() + transport.is_closing.side_effect = AttributeError("no method") + proc._transport = transport + # Should not raise — is_closing() is now inside the try/except + close_subprocess_transport(proc) + + def test_suppresses_is_closing_on_non_transport(self) -> None: + """When _transport exists but is not a real transport, no crash.""" + proc = MagicMock() + # _transport is a string (not a transport object) + type(proc)._transport = PropertyMock(return_value="not-a-transport") + # Should not raise + close_subprocess_transport(proc)