diff --git a/.claude/settings.json b/.claude/settings.json index 0ab6b723bb..39acc8078c 100644 --- a/.claude/settings.json +++ b/.claude/settings.json @@ -29,6 +29,11 @@ "command": "bash scripts/check_no_atlas_rehash.sh", "timeout": 5000 }, + { + "type": "command", + "command": "bash scripts/check_no_baseline_update.sh", + "timeout": 5000 + }, { "type": "command", "command": "bash scripts/check_bash_no_write.sh", @@ -69,6 +74,11 @@ "command": "bash scripts/check_no_edit_baseline.sh", "timeout": 5000 }, + { + "type": "command", + "command": "bash scripts/check_no_em_dashes_hook.sh", + "timeout": 5000 + }, { "type": "command", "command": "bash scripts/check_pre_pr_review_triage_gate.sh", diff --git a/.opencode/plugins/synthorg-hooks.ts b/.opencode/plugins/synthorg-hooks.ts index 4a6d1946f7..aa8140a885 100644 --- a/.opencode/plugins/synthorg-hooks.ts +++ b/.opencode/plugins/synthorg-hooks.ts @@ -7,11 +7,13 @@ * Committed Claude Code hooks (from .claude/settings.json): * PreToolUse (Bash): scripts/check_push_rebased.sh * PreToolUse (Bash): scripts/check_no_atlas_rehash.sh + * PreToolUse (Bash): scripts/check_no_baseline_update.sh * PreToolUse (Bash): scripts/check_bash_no_write.sh * PreToolUse (Bash): scripts/check_git_c_cwd.sh * PreToolUse (Bash | Edit): scripts/check_no_bulk_edit.py * PreToolUse (Edit|Write): scripts/check_no_edit_migration.sh * PreToolUse (Edit|Write): scripts/check_no_edit_baseline.sh + * PreToolUse (Edit|Write): scripts/check_no_em_dashes_hook.sh * PreToolUse (Edit|Write): scripts/check_pre_pr_review_triage_gate.sh * PostToolUse (Edit|Write): scripts/check_web_design_system.py * PostToolUse (Edit|Write): scripts/check_backend_regional_defaults.py @@ -193,16 +195,53 @@ export const SynthOrgHooks: Plugin = async ({ client, $, app }) => { } } - const payload = { tool_input: { file_path: filePath } } as Record; + const filePathInput = { file_path: filePath } as Record; + // Order must match `.claude/settings.json` PreToolUse Edit|Write: + // migration, baseline, em-dash (richer payload), triage-gate. + // The em-dash hook runs between baseline and triage-gate to enforce + // content-correctness before the workflow-lock fires. for (const script of [ "scripts/check_no_edit_migration.sh", "scripts/check_no_edit_baseline.sh", - "scripts/check_pre_pr_review_triage_gate.sh", ]) { const outcome = runHookScript( script, - payload.tool_input as Record, + filePathInput, + 5000, + ); + const denyReason = denyReasonFromOutcome(outcome); + if (denyReason) { + throw new Error(denyReason); + } + } + + // check_no_em_dashes_hook.sh: inspects the candidate content + // before it lands on disk (mirrors scripts/check_no_em_dashes.py). + const args = (output.args ?? {}) as Record; + const emDashInput = { ...filePathInput } as Record; + if (typeof args.content === "string") { + emDashInput.content = args.content; + } + if (typeof args.new_string === "string") { + emDashInput.new_string = args.new_string; + } + { + const outcome = runHookScript( + "scripts/check_no_em_dashes_hook.sh", + emDashInput, + 5000, + ); + const denyReason = denyReasonFromOutcome(outcome); + if (denyReason) { + throw new Error(denyReason); + } + } + + { + const outcome = runHookScript( + "scripts/check_pre_pr_review_triage_gate.sh", + filePathInput, 5000, ); const denyReason = denyReasonFromOutcome(outcome); @@ -300,6 +339,22 @@ export const SynthOrgHooks: Plugin = async ({ client, $, app }) => { } } + // check_no_baseline_update.sh: block --update-baseline / + // --refresh-baseline invocations on gate scripts. Like the atlas + // hook above we invoke unconditionally because aliases / + // subprocess wrappers could hide the literal flag tokens. + { + const outcome = runHookScript( + "scripts/check_no_baseline_update.sh", + { command }, + 5000, + ); + const denyReason = denyReasonFromOutcome(outcome); + if (denyReason) { + throw new Error(denyReason); + } + } + // check_bash_no_write.sh: block file writes via Bash const bashWriteOutcome = runHookScript( "scripts/check_bash_no_write.sh", diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0501085d99..fd5ed3c3a1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -215,6 +215,13 @@ repos: language: system files: ^tests/.*\.py$ + - id: baseline-growth + name: block growth of gate suppression baselines + entry: uv + args: [run, python, scripts/check_baseline_growth.py] + language: system + files: ^scripts/(([a-z_][a-z_]*_baseline\.(txt|json))|(_[a-z][a-z_]*_baseline\.py))$ + - id: no-release-please-token name: forbid new RELEASE_PLEASE_TOKEN references entry: uv diff --git a/CLAUDE.md b/CLAUDE.md index 4ff6b6403d..6c214bc599 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -15,7 +15,7 @@ Web: see `web/CLAUDE.md`. CLI: see `cli/CLAUDE.md` (use `go -C cli`, never `cd c - **Configuration Precedence (MANDATORY)**: DB > env > YAML > code default via `SettingsService`/`ConfigResolver`; no `os.environ.get` outside startup. See [docs/reference/configuration-precedence.md](docs/reference/configuration-precedence.md). - **No Hardcoded Values (MANDATORY)**: numerics live in `settings/definitions/`; allowlist 0/1/-1, HTTP codes, hex masks, powers-of-2. Enforced by `scripts/check_no_magic_numbers.py`. - **Doc Numeric Claims (MANDATORY)**: numerics in README + public docs sourced from `data/runtime_stats.yaml` via `` markers. See `data/README.md`. -- **Test Regression (MANDATORY)**: timeout/slow failures = source-code regression; never edit `tests/baselines/unit_timing.json`. +- **Test Regression (MANDATORY)**: timeout/slow failures = source-code regression; never edit `tests/baselines/unit_timing.json` or any `scripts/*_baseline.{txt,json}` / `scripts/_*_baseline.py`. Both families are PreToolUse-blocked. Per-invocation bypass for gate baselines: `ALLOW_BASELINE_GROWTH=1 git commit ...` (requires explicit user approval). - **Post-Implementation + Pre-PR Review (MANDATORY)**: after issue: branch + commit + push (no auto-PR); use `/pre-pr-review` (gh pr create is hookify-blocked). After PR: `/aurelio-review-pr` for external feedback. Fix EVERYTHING valid; no deferring. ## Quick Commands diff --git a/data/runtime_stats.yaml b/data/runtime_stats.yaml index cfc5c14e27..9cc22343dd 100644 --- a/data/runtime_stats.yaml +++ b/data/runtime_stats.yaml @@ -43,8 +43,8 @@ stats: raw: 7 display: "7" convention_gates: - raw: 37 - display: "37" + raw: 38 + display: "38" sources: tests: "uv run python -m pytest --collect-only -q" diff --git a/docs/reference/convention-gates.md b/docs/reference/convention-gates.md index 1c33058f2b..99a2358816 100644 --- a/docs/reference/convention-gates.md +++ b/docs/reference/convention-gates.md @@ -11,6 +11,7 @@ The gate's job is to catch the SECOND occurrence of the category; the audit's jo All under `scripts/`. The list is generated by `ls scripts/check_*.py`; if an entry below disappears or a new check script lands, update this page in the same PR (and the meta-gate enforces this for the canonical doc set). - `check_backend_regional_defaults.py` +- `check_baseline_growth.py` - `check_boundary_typed.py` - `check_currency_aggregation_invariant.py` - `check_dead_api_endpoints.py` @@ -47,7 +48,20 @@ All under `scripts/`. The list is generated by `ls scripts/check_*.py`; if an en - `check_workflow_shell_git_commits.py` - `check_workflow_tag_lifecycle.py` -(37 total `check_*.py` scripts: enforcement gates plus the meta-gate below.) +(38 total `check_*.py` scripts: enforcement gates plus the meta-gate below.) + +## PreToolUse hooks (Claude Code + OpenCode) + +Some conventions are also enforced *before* the file lands on disk so the offending content never reaches the diff. Bash scripts under `scripts/` registered in `.claude/settings.json` and `.opencode/plugins/synthorg-hooks.ts`: + +- `check_no_edit_baseline.sh`: blocks `Edit` / `Write` on `tests/baselines/*.json`, `scripts/*_baseline.{txt,json}`, and `scripts/_*_baseline.py`. +- `check_no_baseline_update.sh`: blocks `Bash` invocations of `scripts/check_*.py --update-baseline` / `--update` / `--refresh-baseline`. +- `check_no_em_dashes_hook.sh`: blocks `Edit` / `Write` whose candidate content contains a U+2014 em-dash or one of its HTML entities. Mirrors the diff-time `check_no_em_dashes.py` pre-commit gate. +- `check_no_edit_migration.sh`: blocks `Edit` / `Write` on `src/synthorg/persistence/{sqlite,postgres}/revisions/*.sql` (use `atlas migrate diff` instead). +- `check_no_atlas_rehash.sh`: blocks `Bash` invocations of `atlas migrate hash` (rehashing breaks installed databases). +- `check_pre_pr_review_triage_gate.sh`: blocks `Edit` / `Write` outside `_audit/` while a `/pre-pr-review` triage table is pending user approval. + +The hook layer is fail-closed: the OpenCode plugin treats hook execution errors as denials, so a misbehaving hook script blocks the action rather than letting it through. ## Registration procedure diff --git a/scripts/check_baseline_growth.py b/scripts/check_baseline_growth.py new file mode 100644 index 0000000000..099be05d26 --- /dev/null +++ b/scripts/check_baseline_growth.py @@ -0,0 +1,244 @@ +"""Block commits that grow gate-suppression baselines. + +Gate suppression baselines (``scripts/*_baseline.txt``, +``scripts/*_baseline.json``, ``scripts/_*_baseline.py``) record pre-existing +violations that the corresponding ``check_*`` gate is allowed to skip. The +baselines are intended to shrink monotonically -- a PR that makes the codebase +worse by silencing a new violation should fail the gate, not amend the +baseline. + +This pre-commit hook diffs each baseline file against ``HEAD`` and rejects +the commit if the staged version contains *more* entries than the committed +version. Shrinking and rewording are allowed; growing is not. + +Bypass (rare; requires explicit user approval): ``SKIP=baseline-growth git +commit ...`` or set ``ALLOW_BASELINE_GROWTH=1`` in the environment. +""" + +import json +import os +import re +import subprocess +import sys + +EXIT_OK = 0 +EXIT_GROWTH_DETECTED = 1 +EXIT_INVALID_BASELINE = 2 + +_SCRIPTS_DIRNAME = "scripts" + +# Single regex governing every "is this a gate-suppression baseline" decision. +# Three legitimate shapes: +# - ``_baseline.txt`` / ``_baseline.json`` (optional leading ``_``) +# - ``__baseline.py`` (leading ``_`` REQUIRED for py-format baselines) +# Everything else is rejected so ``_is_baseline_path`` and any downstream check +# stay consistent: a previous split between an ``endswith()`` filter and a +# regex sanitiser let some shapes pass one check and not the other. +_BASELINE_BASENAME_RE = re.compile( + r"^(?:_?[a-z][a-z_]*_baseline\.(?:txt|json)|_[a-z][a-z_]*_baseline\.py)$" +) + + +class InvalidBaselineError(Exception): + """Raised when a staged baseline file cannot be parsed.""" + + +def _is_baseline_path(path: str) -> bool: + """Return ``True`` for paths this gate should compare against HEAD. + + Validates against ``_BASELINE_BASENAME_RE`` so every code path agrees on + which shapes count as a baseline. Rejects nested paths and any basename + containing path separators (POSIX or Windows-style). + """ + if not path.startswith(f"{_SCRIPTS_DIRNAME}/"): + return False + basename = path[len(_SCRIPTS_DIRNAME) + 1 :] + if "/" in basename or "\\" in basename: + return False + return _BASELINE_BASENAME_RE.fullmatch(basename) is not None + + +def _count_json_entries(text: str) -> int: + """Count entries in a JSON baseline; raise ``InvalidBaselineError`` on parse failure. + + A corrupt baseline must block the commit, never silently pass. The previous + sentinel return of -1 was less-than every non-negative ``head_count``, which + let malformed baselines slip through the ``staged > head`` comparison. + + For dict payloads, prefer the ``locations`` key (the canonical shape used + by gate baselines). When ``locations`` is missing or non-collection, fall + back to counting top-level keys so a flat-dict baseline format still + surfaces growth instead of silently returning 0. + """ + try: + payload = json.loads(text) + except json.JSONDecodeError as exc: + msg = f"baseline JSON failed to parse: {exc.msg} at line {exc.lineno}" + raise InvalidBaselineError(msg) from exc + if isinstance(payload, dict): + locations = payload.get("locations") + if isinstance(locations, dict): + return len(locations) + if isinstance(locations, list): + return len(locations) + return len(payload) + if isinstance(payload, list): + return len(payload) + return 0 + + +def _count_text_entries(text: str) -> int: + """Count non-blank, non-comment lines in a text baseline.""" + return sum( + 1 + for line in text.splitlines() + if line.strip() and not line.lstrip().startswith("#") + ) + + +def _staged_entries(text: str, suffix: str) -> int: + """Count the entries in a baseline file's content.""" + if suffix == ".json": + return _count_json_entries(text) + return _count_text_entries(text) + + +def _read_head(path: str) -> str | None: + """Return the file at ``HEAD`` or ``None`` if it did not exist there. + + ``FileNotFoundError`` from a missing ``git`` binary is treated the same as + "no HEAD content", matching the not-yet-committed-baseline path. Any other + OS error is surfaced via stderr so a broken environment is visible rather + than silently letting growth through. + """ + try: + result = subprocess.run( + ["git", "show", f"HEAD:{path}"], + check=False, + capture_output=True, + text=True, + ) + except FileNotFoundError: + return None + except OSError as exc: + print(f"WARNING: git show failed for {path}: {exc}", file=sys.stderr) + return None + if result.returncode != 0: + return None + return result.stdout + + +def _classify(path: str) -> str: + """Return the file-suffix tag (``.json`` / ``.py`` / ``.txt``) for a baseline path.""" + if path.endswith(".json"): + return ".json" + if path.endswith(".py"): + return ".py" + return ".txt" + + +def _read_staged(path: str) -> str | None: + """Return the staged-blob content for ``path`` from the git index, or ``None``. + + Reads via ``git show :`` so the gate compares what is actually + staged for the commit, not whatever happens to be on disk. A working-tree + read could drift from the index (post-stage edit, file removed after + staging) and let growth slip through. Reading from the index also avoids + any filesystem touch on the user-controlled argv path, closing the + path-injection class entirely. + + Treats a missing ``git`` binary, the path not being staged, and other OS + errors as "no staged content" so the caller skips silently rather than + crashing on infrastructure problems pre-commit cannot reach in practice. + """ + try: + result = subprocess.run( + ["git", "show", f":{path}"], + check=False, + capture_output=True, + text=True, + ) + except FileNotFoundError: + return None + except OSError as exc: + print(f"WARNING: git show :{path} failed: {exc}", file=sys.stderr) + return None + if result.returncode != 0: + return None + return result.stdout + + +def _inspect_path( + path: str, + grown: list[tuple[str, int, int]], + invalid: list[tuple[str, str]], +) -> None: + """Compare one staged baseline against HEAD; record growth or parse failure.""" + suffix = _classify(path) + staged_text = _read_staged(path) + if staged_text is None: + return + try: + staged_count = _staged_entries(staged_text, suffix) + except InvalidBaselineError as exc: + invalid.append((path, str(exc))) + return + head_text = _read_head(path) + head_count = 0 + if head_text is not None: + try: + head_count = _staged_entries(head_text, suffix) + except InvalidBaselineError as exc: + print( + f"WARNING: HEAD baseline {path} failed to parse ({exc}); " + "falling back to head_count=0. The growth check may be " + "over-strict for this file until HEAD is repaired.", + file=sys.stderr, + ) + head_count = 0 + if staged_count > head_count: + grown.append((path, head_count, staged_count)) + + +def main(argv: list[str]) -> int: + """Compare staged baseline files against HEAD; reject growth.""" + if os.environ.get("ALLOW_BASELINE_GROWTH") == "1": + return EXIT_OK + paths = [p for p in argv[1:] if _is_baseline_path(p)] + if not paths: + return EXIT_OK + grown: list[tuple[str, int, int]] = [] + invalid: list[tuple[str, str]] = [] + for path in paths: + _inspect_path(path, grown, invalid) + if invalid: + print( + "Gate suppression baselines failed to parse:", + file=sys.stderr, + ) + for path, reason in invalid: + print(f" {path}: {reason}", file=sys.stderr) + return EXIT_INVALID_BASELINE + if not grown: + return EXIT_OK + print( + "Gate suppression baselines must shrink monotonically. " + "These files have more entries than HEAD:", + file=sys.stderr, + ) + for path, head_count, staged_count in grown: + print( + f" {path}: {head_count} -> {staged_count} (+{staged_count - head_count})", + file=sys.stderr, + ) + print( + "\nFix the source instead, or ask the user before adding a new exception.\n" + "Bypass (requires explicit user approval): " + "ALLOW_BASELINE_GROWTH=1 git commit ...", + file=sys.stderr, + ) + return EXIT_GROWTH_DETECTED + + +if __name__ == "__main__": + sys.exit(main(sys.argv)) diff --git a/scripts/check_no_baseline_update.sh b/scripts/check_no_baseline_update.sh new file mode 100755 index 0000000000..47447c7834 --- /dev/null +++ b/scripts/check_no_baseline_update.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash +# PreToolUse hook: block gate-baseline regeneration commands. +# +# `python scripts/check_.py --update-baseline` (and the older `--update` +# flag on a few gates) silently rewrites a baseline file to absorb new +# violations. This is the exact "grow gate-suppression debt" path we want +# to stop happening autonomously. +# +# Regeneration is rare and requires explicit user approval. If the user +# decides a regeneration is warranted, they can run the command themselves +# in a `! `-prefixed bash invocation, or temporarily disable this hook. +# +# Exit behaviour: +# - Non-update-baseline commands: exit 0 (allow) +# - --update-baseline / --update on a check_*.py: print JSON, exit 2 + +set -euo pipefail + +COMMAND=$(jq -r '.tool_input.command // ""' 2>/dev/null || true) +if [[ -z "$COMMAND" ]]; then + exit 0 +fi + +# Match patterns: +# scripts/check_.py --update-baseline +# scripts/check_.py --update +# python ... check_.py ... --update-baseline / --update +# uv run python ... check_.py ... --update-baseline / --update +# python scripts/check_.py --refresh-baseline +if [[ "$COMMAND" =~ scripts/check_[a-z_]+\.py.*(--update-baseline|--refresh-baseline) ]] || \ + [[ "$COMMAND" =~ scripts/check_[a-z_]+\.py.*--update($|[[:space:]]) ]]; then + REASON="Gate baseline regeneration is not autonomously allowed. Adding a new exception requires explicit user approval. Fix the source instead, or ask the user before running --update-baseline / --update / --refresh-baseline." + jq -nc \ + --arg reason "$REASON" \ + '{hookSpecificOutput: {hookEventName: "PreToolUse", permissionDecision: "deny", permissionDecisionReason: $reason}}' + exit 2 +fi + +exit 0 diff --git a/scripts/check_no_edit_baseline.sh b/scripts/check_no_edit_baseline.sh index 19f3535a54..d6b9581b1a 100644 --- a/scripts/check_no_edit_baseline.sh +++ b/scripts/check_no_edit_baseline.sh @@ -1,11 +1,14 @@ #!/usr/bin/env bash -# PreToolUse hook: block Edit/Write on test timing baselines. +# PreToolUse hook: block Edit/Write on protected baseline files. # Baseline files must only be updated with explicit user approval. -# Claude must never autonomously bump baselines to bypass regression guards. +# Claude must never autonomously bump baselines to bypass regression guards +# or grow gate-suppression debt. # # Protected files: -# - tests/baselines/unit_timing.json -# - tests/baselines/*.json (all baselines) +# - tests/baselines/*.json (test timing baselines) +# - scripts/*_baseline.txt (gate suppression baselines) +# - scripts/*_baseline.json (gate suppression baselines) +# - scripts/_*_baseline.py (gate suppression baselines, py-format) # # Exit behavior: # - Non-baseline files: exit 0 (allow) @@ -20,8 +23,22 @@ if [[ -z "$FILE_PATH" ]]; then exit 0 fi -if [[ "$FILE_PATH" == */tests/baselines/*.json || "$FILE_PATH" == tests/baselines/*.json ]]; then - REASON="Test timing baselines require explicit user approval to modify. Do not bump baselines to bypass regression guards -- fix the source code or tests instead." +# Normalise Windows-style backslashes to forward slashes so the case patterns +# below match `C:\repo\scripts\foo_baseline.txt` the same as the POSIX form. +NORMALISED_FILE_PATH=${FILE_PATH//\\//} + +REASON="" + +case "$NORMALISED_FILE_PATH" in + */tests/baselines/*.json|tests/baselines/*.json) + REASON="Test timing baselines require explicit user approval to modify. Do not bump baselines to bypass regression guards -- fix the source code or tests instead." + ;; + */scripts/*_baseline.txt|scripts/*_baseline.txt|*/scripts/*_baseline.json|scripts/*_baseline.json|*/scripts/_*_baseline.py|scripts/_*_baseline.py) + REASON="Gate suppression baselines require explicit user approval to modify. Do not grow them to silence new violations -- fix the source instead. If a legitimate exception exists, ask the user before regenerating with the gate's --update-baseline flag." + ;; +esac + +if [[ -n "$REASON" ]]; then cat </dev/null || true) +if [[ -z "$FILE_PATH" ]]; then + exit 0 +fi + +# Skip regenerated changelog (release-please rewrites this file from git history). +# Normalise Windows-style backslashes to forward slashes so the case patterns +# below match `C:\repo\.github\CHANGELOG.md` the same as the POSIX form. +NORMALISED_FILE_PATH=${FILE_PATH//\\//} +case "$NORMALISED_FILE_PATH" in + */.github/CHANGELOG.md|.github/CHANGELOG.md) + exit 0 + ;; +esac + +# Extract content: Write tool uses .tool_input.content, Edit tool uses .tool_input.new_string. +CONTENT=$(jq -r '.tool_input.content // .tool_input.new_string // ""' <<<"$INPUT" 2>/dev/null || true) +if [[ -z "$CONTENT" ]]; then + exit 0 +fi + +EM_DASH=$'\xe2\x80\x94' +AMP='&' +HTML_NAMED="${AMP}mdash;" +HTML_DEC="${AMP}#8212;" +HTML_HEX="${AMP}#x2014;" + +if grep -qF -e "$EM_DASH" -e "$HTML_NAMED" -e "$HTML_DEC" -e "$HTML_HEX" <<<"$CONTENT"; then + REASON="Em-dash (U+2014) detected in the content being written. Replace with the ASCII punctuation that matches the sentence: ':' (colon), ';' (semicolon), ',' (comma), '.' (period), '( ... )' (parens), or '-' (hyphen, compound only). Bare '--' is almost never right; prefer one of the above or rewrite." + jq -nc \ + --arg reason "$REASON" \ + '{hookSpecificOutput: {hookEventName: "PreToolUse", permissionDecision: "deny", permissionDecisionReason: $reason}}' + exit 2 +fi + +exit 0 diff --git a/tests/unit/scripts/test_check_baseline_growth.py b/tests/unit/scripts/test_check_baseline_growth.py new file mode 100644 index 0000000000..999605b1c7 --- /dev/null +++ b/tests/unit/scripts/test_check_baseline_growth.py @@ -0,0 +1,459 @@ +"""Unit tests for ``scripts/check_baseline_growth.py``. + +Loads the script as a module so its private helpers are callable +without spawning subprocesses. +""" + +import importlib.util +import json +import subprocess +from pathlib import Path +from types import ModuleType +from typing import Any, cast +from unittest.mock import patch + +import pytest + +pytestmark = pytest.mark.unit + +_REPO_ROOT = Path(__file__).resolve().parent.parent.parent.parent +_SCRIPT_PATH = _REPO_ROOT / "scripts" / "check_baseline_growth.py" + + +def _load_script_module() -> ModuleType: + spec = importlib.util.spec_from_file_location( + "_check_baseline_growth", + _SCRIPT_PATH, + ) + assert spec is not None + assert spec.loader is not None + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +_MODULE: Any = cast("Any", _load_script_module()) + + +# ── path classification ───────────────────────────────────────── + + +@pytest.mark.parametrize( + ("path", "expected"), + [ + # Accepted shapes + ("scripts/mock_spec_baseline.txt", True), + ("scripts/no_magic_numbers_baseline.txt", True), + ("scripts/_workflow_shell_git_commits_baseline.json", True), + ("scripts/legit_baseline.json", True), + ("scripts/_schema_drift_baseline.py", True), + # Wrong location + ("tests/baselines/unit_timing.json", False), + ("README.md", False), + # Wrong extension / shape + ("scripts/check_mock_spec.py", False), + ("scripts/check_no_edit_baseline.sh", False), + ("scripts/no_baseline_at_all.txt", False), + # py-format baseline must start with leading underscore + ("scripts/legit_baseline.py", False), + # Nested paths and traversal sequences must be rejected + ("scripts/subdir/mock_spec_baseline.txt", False), + ("scripts/../escape_baseline.txt", False), + (r"scripts/..\escape_baseline.txt", False), + # Disallowed character classes in basename + ("scripts/Capitalised_Baseline.txt", False), + ("scripts/legit-baseline.txt", False), + ("scripts/legit_baseline.exe", False), + # Anchored at scripts/ specifically + ("../escape_baseline.txt", False), + ("scriptsX/legit_baseline.txt", False), + ], +) +def test_is_baseline_path(path: str, expected: bool) -> None: + assert _MODULE._is_baseline_path(path) is expected + + +# ── entry counting ────────────────────────────────────────────── + + +def test_count_text_entries_skips_comments_and_blanks() -> None: + text = ( + "# header line\n" + "# another comment\n" + "\n" + "src/a.py:10:5\n" + "src/b.py:20:7\n" + " \n" + "src/c.py:30:9\n" + ) + assert _MODULE._count_text_entries(text) == 3 + + +def test_count_json_entries_locations_dict() -> None: + text = json.dumps( + {"description": "x", "locations": {"a": 1, "b": 2, "c": 3}}, + ) + assert _MODULE._count_json_entries(text) == 3 + + +def test_count_json_entries_locations_list() -> None: + text = json.dumps({"locations": ["a", "b"]}) + assert _MODULE._count_json_entries(text) == 2 + + +def test_count_json_entries_top_level_list() -> None: + text = json.dumps([1, 2, 3, 4]) + assert _MODULE._count_json_entries(text) == 4 + + +def test_count_json_entries_invalid_raises() -> None: + with pytest.raises(_MODULE.InvalidBaselineError): + _MODULE._count_json_entries("not json") + + +def test_count_json_entries_no_locations_dict_falls_back_to_top_level_keys() -> None: + """A flat-dict baseline (no ``locations`` key) falls back to top-level keys. + + The earlier sentinel of 0 created a loophole: any flat-dict format would + return 0 for both staged and HEAD, so growth (``staged > head``) was + never detected. Counting top-level keys instead lets the gate catch + additions even on unconventional baseline shapes. + """ + assert _MODULE._count_json_entries(json.dumps({"foo": "bar"})) == 1 + assert ( + _MODULE._count_json_entries(json.dumps({"a": 1, "b": 2, "c": 3, "d": 4})) == 4 + ) + + +# ── classification ────────────────────────────────────────────── + + +@pytest.mark.parametrize( + ("path", "expected"), + [ + ("scripts/foo_baseline.txt", ".txt"), + ("scripts/foo_baseline.json", ".json"), + ("scripts/_foo_baseline.py", ".py"), + ], +) +def test_classify(path: str, expected: str) -> None: + assert _MODULE._classify(path) == expected + + +# ── main: end-to-end behaviour ────────────────────────────────── + + +def test_main_returns_zero_on_no_baseline_paths() -> None: + rc: int = _MODULE.main(["check_baseline_growth.py", "src/foo.py"]) + assert rc == 0 + + +def test_main_bypass_via_env(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("ALLOW_BASELINE_GROWTH", "1") + rc: int = _MODULE.main( + ["check_baseline_growth.py", "scripts/mock_spec_baseline.txt"], + ) + assert rc == 0 + + +def test_main_detects_growth( + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + monkeypatch.delenv("ALLOW_BASELINE_GROWTH", raising=False) + with ( + patch.object( + _MODULE, + "_read_staged", + return_value="# header\nsrc/a.py:1:1\nsrc/b.py:2:2\nsrc/c.py:3:3\n", + ), + patch.object( + _MODULE, + "_read_head", + return_value="# header\nsrc/a.py:1:1\n", + ), + ): + rc: int = _MODULE.main( + ["check_baseline_growth.py", "scripts/fake_baseline.txt"], + ) + assert rc == 1 + captured = capsys.readouterr() + assert "fake_baseline.txt" in captured.err + assert "1 -> 3" in captured.err + assert "ALLOW_BASELINE_GROWTH=1" in captured.err + + +def test_main_allows_shrink(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("ALLOW_BASELINE_GROWTH", raising=False) + with ( + patch.object(_MODULE, "_read_staged", return_value="# header\nsrc/a.py:1:1\n"), + patch.object( + _MODULE, + "_read_head", + return_value="# header\nsrc/a.py:1:1\nsrc/b.py:2:2\nsrc/c.py:3:3\n", + ), + ): + rc: int = _MODULE.main( + ["check_baseline_growth.py", "scripts/fake_baseline.txt"], + ) + assert rc == 0 + + +def test_main_treats_missing_head_as_empty( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("ALLOW_BASELINE_GROWTH", raising=False) + with ( + patch.object(_MODULE, "_read_staged", return_value="src/a.py:1:1\n"), + patch.object(_MODULE, "_read_head", return_value=None), + ): + rc: int = _MODULE.main( + ["check_baseline_growth.py", "scripts/new_baseline.txt"], + ) + assert rc == 1 + + +def test_read_head_returns_none_for_missing(monkeypatch: pytest.MonkeyPatch) -> None: + def fake_run( + cmd: list[str], + **kwargs: object, + ) -> subprocess.CompletedProcess[str]: + del kwargs + return subprocess.CompletedProcess( + args=cmd, + returncode=128, + stdout="", + stderr="", + ) + + monkeypatch.setattr(_MODULE.subprocess, "run", fake_run) + assert _MODULE._read_head("scripts/does_not_exist.txt") is None + + +def test_read_head_returns_none_when_git_binary_missing( + monkeypatch: pytest.MonkeyPatch, +) -> None: + def fake_run( + cmd: list[str], + **kwargs: object, + ) -> subprocess.CompletedProcess[str]: + del cmd, kwargs + msg = "git binary not on PATH" + raise FileNotFoundError(msg) + + monkeypatch.setattr(_MODULE.subprocess, "run", fake_run) + assert _MODULE._read_head("scripts/does_not_exist.txt") is None + + +def test_read_head_warns_on_unexpected_oserror( + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + def fake_run( + cmd: list[str], + **kwargs: object, + ) -> subprocess.CompletedProcess[str]: + del cmd, kwargs + msg = "git denied" + raise PermissionError(msg) + + monkeypatch.setattr(_MODULE.subprocess, "run", fake_run) + assert _MODULE._read_head("scripts/some_baseline.txt") is None + captured = capsys.readouterr() + assert "git show failed" in captured.err + assert "some_baseline.txt" in captured.err + + +def test_main_blocks_invalid_json_baseline( + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + monkeypatch.delenv("ALLOW_BASELINE_GROWTH", raising=False) + with ( + patch.object( + _MODULE, + "_read_staged", + return_value="{ this is not valid json ", + ), + patch.object(_MODULE, "_read_head", return_value="{}"), + ): + rc: int = _MODULE.main( + ["check_baseline_growth.py", "scripts/_fake_baseline.json"], + ) + assert rc == _MODULE.EXIT_INVALID_BASELINE + captured = capsys.readouterr() + assert "_fake_baseline.json" in captured.err + assert "failed to parse" in captured.err + + +def test_main_skips_when_path_not_in_index( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """A baseline path missing from the git index is silently skipped. + + Pre-commit cannot reach this case in practice because the file is staged, + but ``git show :`` will return non-zero (and ``_read_staged`` will + return ``None``) for a path that is not in the index. The gate has + nothing to compare against, so it skips. + """ + monkeypatch.delenv("ALLOW_BASELINE_GROWTH", raising=False) + with patch.object(_MODULE, "_read_staged", return_value=None): + rc: int = _MODULE.main( + ["check_baseline_growth.py", "scripts/missing_baseline.txt"], + ) + assert rc == 0 + + +def test_inspect_path_warns_on_corrupt_head_baseline( + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + """A corrupt HEAD baseline emits a stderr warning before falling back to 0. + + Silent fallback masked HEAD corruption; the warning makes the over-strict + growth check visible without changing the safe behaviour (head_count=0 + still rejects any non-empty staged baseline as growth). + """ + monkeypatch.delenv("ALLOW_BASELINE_GROWTH", raising=False) + with ( + patch.object( + _MODULE, + "_read_staged", + return_value=json.dumps({"locations": {"a": 1}}), + ), + patch.object( + _MODULE, + "_read_head", + return_value="{ this is not valid json ", + ), + ): + rc: int = _MODULE.main( + [ + "check_baseline_growth.py", + "scripts/_corrupt_head_baseline.json", + ], + ) + assert rc == _MODULE.EXIT_GROWTH_DETECTED + captured = capsys.readouterr() + assert "WARNING: HEAD baseline" in captured.err + assert "_corrupt_head_baseline.json" in captured.err + assert "failed to parse" in captured.err + + +def test_main_handles_multiple_paths_mixed_states( + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + monkeypatch.delenv("ALLOW_BASELINE_GROWTH", raising=False) + staged_lookup = { + "scripts/grew_baseline.txt": "a\nb\nc\nd\n", + "scripts/shrank_baseline.txt": "a\n", + "scripts/missing_baseline.txt": None, + } + head_lookup = { + "scripts/grew_baseline.txt": "a\nb\n", + "scripts/shrank_baseline.txt": "a\nb\nc\n", + "scripts/missing_baseline.txt": None, + } + + def fake_read_staged(path: str) -> str | None: + return staged_lookup[path] + + def fake_read_head(path: str) -> str | None: + return head_lookup[path] + + with ( + patch.object(_MODULE, "_read_staged", side_effect=fake_read_staged), + patch.object(_MODULE, "_read_head", side_effect=fake_read_head), + ): + rc: int = _MODULE.main( + [ + "check_baseline_growth.py", + "scripts/grew_baseline.txt", + "scripts/shrank_baseline.txt", + "scripts/missing_baseline.txt", + ], + ) + assert rc == 1 + captured = capsys.readouterr() + assert "grew_baseline.txt" in captured.err + assert "shrank_baseline.txt" not in captured.err + assert "missing_baseline.txt" not in captured.err + + +# ── _read_staged ──────────────────────────────────────────────── + + +def test_read_staged_returns_none_for_path_not_in_index( + monkeypatch: pytest.MonkeyPatch, +) -> None: + def fake_run( + cmd: list[str], + **kwargs: object, + ) -> subprocess.CompletedProcess[str]: + del kwargs + return subprocess.CompletedProcess( + args=cmd, + returncode=128, + stdout="", + stderr="fatal: path 'scripts/x.txt' does not exist in the index", + ) + + monkeypatch.setattr(_MODULE.subprocess, "run", fake_run) + assert _MODULE._read_staged("scripts/x.txt") is None + + +def test_read_staged_returns_none_when_git_binary_missing( + monkeypatch: pytest.MonkeyPatch, +) -> None: + def fake_run( + cmd: list[str], + **kwargs: object, + ) -> subprocess.CompletedProcess[str]: + del cmd, kwargs + msg = "git binary not on PATH" + raise FileNotFoundError(msg) + + monkeypatch.setattr(_MODULE.subprocess, "run", fake_run) + assert _MODULE._read_staged("scripts/x.txt") is None + + +def test_read_staged_warns_on_unexpected_oserror( + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + def fake_run( + cmd: list[str], + **kwargs: object, + ) -> subprocess.CompletedProcess[str]: + del cmd, kwargs + msg = "git denied" + raise PermissionError(msg) + + monkeypatch.setattr(_MODULE.subprocess, "run", fake_run) + assert _MODULE._read_staged("scripts/some_baseline.txt") is None + captured = capsys.readouterr() + assert "git show" in captured.err + assert "some_baseline.txt" in captured.err + + +def test_read_staged_returns_blob_content( + monkeypatch: pytest.MonkeyPatch, +) -> None: + def fake_run( + cmd: list[str], + **kwargs: object, + ) -> subprocess.CompletedProcess[str]: + del kwargs + return subprocess.CompletedProcess( + args=cmd, + returncode=0, + stdout="src/a.py:1:1\nsrc/b.py:2:2\n", + stderr="", + ) + + monkeypatch.setattr(_MODULE.subprocess, "run", fake_run) + assert ( + _MODULE._read_staged("scripts/mock_spec_baseline.txt") + == "src/a.py:1:1\nsrc/b.py:2:2\n" + ) diff --git a/tests/unit/scripts/test_check_no_edit_baseline.py b/tests/unit/scripts/test_check_no_edit_baseline.py new file mode 100644 index 0000000000..c3f6ebf60c --- /dev/null +++ b/tests/unit/scripts/test_check_no_edit_baseline.py @@ -0,0 +1,156 @@ +"""Unit tests for ``scripts/check_no_edit_baseline.sh``. + +The hook is a Bash PreToolUse intercept that fires before Edit/Write lands +on a protected baseline file. We invoke it as a subprocess feeding mock +JSON envelopes on stdin and assert on exit codes + stdout / stderr. +""" + +import json +import shutil +import subprocess +from pathlib import Path + +import pytest + +pytestmark = pytest.mark.unit + +_REPO_ROOT = Path(__file__).resolve().parent.parent.parent.parent +_SCRIPT = _REPO_ROOT / "scripts" / "check_no_edit_baseline.sh" + +_BASH = shutil.which("bash") +_BASH_AVAILABLE = pytest.mark.skipif(_BASH is None, reason="bash not available") + + +def _run(envelope: dict[str, object]) -> subprocess.CompletedProcess[str]: + assert _BASH is not None + return subprocess.run( # noqa: S603 + [_BASH, str(_SCRIPT)], + input=json.dumps(envelope), + capture_output=True, + text=True, + check=False, + encoding="utf-8", + ) + + +@_BASH_AVAILABLE +def test_blocks_edit_of_text_baseline_posix() -> None: + result = _run( + { + "tool_input": { + "file_path": "scripts/mock_spec_baseline.txt", + "new_string": "src/x.py:1:1\n", + }, + }, + ) + assert result.returncode == 2 + + +@_BASH_AVAILABLE +def test_blocks_edit_of_json_baseline_posix() -> None: + result = _run( + { + "tool_input": { + "file_path": "scripts/_workflow_shell_git_commits_baseline.json", + "new_string": "{}", + }, + }, + ) + assert result.returncode == 2 + + +@_BASH_AVAILABLE +def test_blocks_edit_of_test_timing_baseline_posix() -> None: + result = _run( + { + "tool_input": { + "file_path": "tests/baselines/unit_timing.json", + "new_string": "{}", + }, + }, + ) + assert result.returncode == 2 + + +@_BASH_AVAILABLE +def test_blocks_edit_of_python_baseline_posix() -> None: + """Python-format gate baselines (``scripts/_*_baseline.py``) are protected too.""" + result = _run( + { + "tool_input": { + "file_path": "scripts/_workflow_shell_git_commits_baseline.py", + "new_string": "[]", + }, + }, + ) + assert result.returncode == 2 + + +@_BASH_AVAILABLE +def test_allows_non_baseline_path() -> None: + result = _run( + { + "tool_input": { + "file_path": "src/synthorg/foo.py", + "new_string": "x = 1", + }, + }, + ) + assert result.returncode == 0 + + +@_BASH_AVAILABLE +@pytest.mark.parametrize( + "windows_path", + [ + r"C:\repo\scripts\mock_spec_baseline.txt", + r"scripts\mock_spec_baseline.txt", + r"D:\projects\synthorg\scripts\_workflow_shell_git_commits_baseline.json", + r"D:\projects\synthorg\scripts\_workflow_shell_git_commits_baseline.py", + r"C:\repo\tests\baselines\unit_timing.json", + ], +) +def test_blocks_edit_of_baseline_via_windows_path(windows_path: str) -> None: + """Windows-style backslash paths must hit the same protection as POSIX. + + Without normalisation, the case patterns only match ``/``-separated paths + and a Windows path like ``C:\\repo\\scripts\\mock_spec_baseline.txt`` + would slip past the gate. The hook normalises backslashes before the + case match. + """ + result = _run( + { + "tool_input": { + "file_path": windows_path, + "new_string": "src/x.py:1:1\n", + }, + }, + ) + assert result.returncode == 2 + + +@_BASH_AVAILABLE +def test_allows_when_no_file_path() -> None: + result = _run({"tool_input": {}}) + assert result.returncode == 0 + + +@_BASH_AVAILABLE +def test_allows_malformed_json_envelope() -> None: + """Malformed stdin must not crash the hook; falls through to allow. + + Choosing fail-closed (block) here would brick Edit/Write across the + entire session if jq itself misbehaved or the harness sent a non-JSON + envelope. Allowing through is the deliberate posture: the in-repo + pre-commit baseline-growth gate is the diff-time backstop. + """ + assert _BASH is not None + result = subprocess.run( # noqa: S603 + [_BASH, str(_SCRIPT)], + input="this is not json at all { ", + capture_output=True, + text=True, + check=False, + encoding="utf-8", + ) + assert result.returncode == 0 diff --git a/tests/unit/scripts/test_check_no_em_dashes_hook.py b/tests/unit/scripts/test_check_no_em_dashes_hook.py new file mode 100644 index 0000000000..c2953a9323 --- /dev/null +++ b/tests/unit/scripts/test_check_no_em_dashes_hook.py @@ -0,0 +1,193 @@ +"""Unit tests for ``scripts/check_no_em_dashes_hook.sh``. + +The hook is a Bash PreToolUse intercept that fires before Edit/Write lands. +We invoke it as a subprocess feeding mock JSON envelopes on stdin and +assert on exit codes + stdout / stderr. + +The em-dash patterns are constructed at runtime via ``chr(0x2014)`` and +string concatenation so this source file itself stays free of any +literal blocked pattern (otherwise the hook would block edits to this +test file -- the same trick is used in scripts/check_no_em_dashes.py). +""" + +import json +import shutil +import subprocess +from pathlib import Path + +import pytest + +pytestmark = pytest.mark.unit + +_REPO_ROOT = Path(__file__).resolve().parent.parent.parent.parent +_SCRIPT = _REPO_ROOT / "scripts" / "check_no_em_dashes_hook.sh" + +_BASH = shutil.which("bash") +_BASH_AVAILABLE = pytest.mark.skipif(_BASH is None, reason="bash not available") + +_EM_DASH = chr(0x2014) +_HTML_NAMED = "&" + "mdash;" +_HTML_DEC = "&" + "#8212;" +_HTML_HEX = "&" + "#x2014;" + + +def _run(envelope: dict[str, object]) -> subprocess.CompletedProcess[str]: + assert _BASH is not None + return subprocess.run( # noqa: S603 + [_BASH, str(_SCRIPT)], + input=json.dumps(envelope), + capture_output=True, + text=True, + check=False, + encoding="utf-8", + ) + + +@_BASH_AVAILABLE +def test_blocks_em_dash_in_write_content() -> None: + result = _run( + { + "tool_input": { + "file_path": "src/foo.py", + "content": f"hello {_EM_DASH} world", + }, + }, + ) + assert result.returncode == 2 + assert "permissionDecision" in result.stdout + assert "deny" in result.stdout + + +@_BASH_AVAILABLE +def test_blocks_em_dash_in_edit_new_string() -> None: + result = _run( + { + "tool_input": { + "file_path": "src/foo.py", + "old_string": "x", + "new_string": f"hello {_EM_DASH} world", + }, + }, + ) + assert result.returncode == 2 + + +@_BASH_AVAILABLE +@pytest.mark.parametrize( + "entity", + [_HTML_NAMED, _HTML_DEC, _HTML_HEX], +) +def test_blocks_html_entities(entity: str) -> None: + result = _run( + {"tool_input": {"file_path": "src/foo.html", "content": f"foo {entity} bar"}}, + ) + assert result.returncode == 2 + + +@_BASH_AVAILABLE +def test_allows_ascii_hyphen() -> None: + result = _run( + {"tool_input": {"file_path": "src/foo.py", "content": "hello - world"}}, + ) + assert result.returncode == 0 + + +@_BASH_AVAILABLE +def test_allows_double_hyphen() -> None: + result = _run( + {"tool_input": {"file_path": "src/foo.py", "content": "foo -- bar"}}, + ) + assert result.returncode == 0 + + +@_BASH_AVAILABLE +def test_skips_changelog_md() -> None: + result = _run( + { + "tool_input": { + "file_path": ".github/CHANGELOG.md", + "content": f"hello {_EM_DASH} world", + }, + }, + ) + assert result.returncode == 0 + + +@_BASH_AVAILABLE +def test_skips_changelog_md_absolute_path() -> None: + result = _run( + { + "tool_input": { + "file_path": "/absolute/.github/CHANGELOG.md", + "content": f"hello {_EM_DASH} world", + }, + }, + ) + assert result.returncode == 0 + + +@_BASH_AVAILABLE +@pytest.mark.parametrize( + "windows_path", + [ + r"C:\repo\.github\CHANGELOG.md", + r".github\CHANGELOG.md", + r"D:\projects\synthorg\.github\CHANGELOG.md", + ], +) +def test_skips_changelog_md_windows_path(windows_path: str) -> None: + """Windows-style backslash paths must hit the same exemption as POSIX. + + Without normalisation, the case pattern only matches ``/``-separated paths + and a Windows path like ``C:\\repo\\.github\\CHANGELOG.md`` would be blocked + even though changelog writes are exempt. The hook normalises backslashes + to forward slashes before the case match. + """ + result = _run( + { + "tool_input": { + "file_path": windows_path, + "content": f"hello {_EM_DASH} world", + }, + }, + ) + assert result.returncode == 0 + + +@_BASH_AVAILABLE +def test_allows_empty_content() -> None: + result = _run({"tool_input": {"file_path": "src/foo.py", "content": ""}}) + assert result.returncode == 0 + + +@_BASH_AVAILABLE +def test_allows_when_no_file_path() -> None: + result = _run({"tool_input": {}}) + assert result.returncode == 0 + + +@_BASH_AVAILABLE +def test_allows_when_tool_input_missing() -> None: + result = _run({}) + assert result.returncode == 0 + + +@_BASH_AVAILABLE +def test_allows_malformed_json_envelope() -> None: + """Malformed stdin must not crash the hook; falls through to allow. + + Choosing fail-closed (block) here would brick Edit/Write across the entire + session if jq itself misbehaved or the harness sent a non-JSON envelope. + Allowing through is the deliberate posture: the pre-commit gate + `scripts/check_no_em_dashes.py` is the diff-time backstop. + """ + assert _BASH is not None + result = subprocess.run( # noqa: S603 + [_BASH, str(_SCRIPT)], + input="this is not json at all { ", + capture_output=True, + text=True, + check=False, + encoding="utf-8", + ) + assert result.returncode == 0