From d5d3bbf6b862801c1aabdc20a6eee42ee76c9210 Mon Sep 17 00:00:00 2001 From: Aurelio <19254254+Aureliolo@users.noreply.github.com> Date: Sun, 10 May 2026 10:32:11 +0200 Subject: [PATCH 1/6] feat: harden gate baseline protection against silent debt growth --- .claude/settings.json | 5 + .opencode/plugins/synthorg-hooks.ts | 17 ++ .pre-commit-config.yaml | 7 + scripts/check_baseline_growth.py | 139 +++++++++++ scripts/check_no_baseline_update.sh | 47 ++++ scripts/check_no_edit_baseline.sh | 25 +- .../scripts/test_check_baseline_growth.py | 217 ++++++++++++++++++ 7 files changed, 451 insertions(+), 6 deletions(-) create mode 100644 scripts/check_baseline_growth.py create mode 100755 scripts/check_no_baseline_update.sh create mode 100644 tests/unit/scripts/test_check_baseline_growth.py diff --git a/.claude/settings.json b/.claude/settings.json index 0ab6b723bb..5ffd6cf29a 100644 --- a/.claude/settings.json +++ b/.claude/settings.json @@ -29,6 +29,11 @@ "command": "bash scripts/check_no_atlas_rehash.sh", "timeout": 5000 }, + { + "type": "command", + "command": "bash scripts/check_no_baseline_update.sh", + "timeout": 5000 + }, { "type": "command", "command": "bash scripts/check_bash_no_write.sh", diff --git a/.opencode/plugins/synthorg-hooks.ts b/.opencode/plugins/synthorg-hooks.ts index 4a6d1946f7..f66c6d5dd9 100644 --- a/.opencode/plugins/synthorg-hooks.ts +++ b/.opencode/plugins/synthorg-hooks.ts @@ -7,6 +7,7 @@ * Committed Claude Code hooks (from .claude/settings.json): * PreToolUse (Bash): scripts/check_push_rebased.sh * PreToolUse (Bash): scripts/check_no_atlas_rehash.sh + * PreToolUse (Bash): scripts/check_no_baseline_update.sh * PreToolUse (Bash): scripts/check_bash_no_write.sh * PreToolUse (Bash): scripts/check_git_c_cwd.sh * PreToolUse (Bash | Edit): scripts/check_no_bulk_edit.py @@ -300,6 +301,22 @@ export const SynthOrgHooks: Plugin = async ({ client, $, app }) => { } } + // check_no_baseline_update.sh: block --update-baseline / + // --refresh-baseline invocations on gate scripts. Like the atlas + // hook above we invoke unconditionally because aliases / + // subprocess wrappers could hide the literal flag tokens. + { + const outcome = runHookScript( + "scripts/check_no_baseline_update.sh", + { command }, + 5000, + ); + const denyReason = denyReasonFromOutcome(outcome); + if (denyReason) { + throw new Error(denyReason); + } + } + // check_bash_no_write.sh: block file writes via Bash const bashWriteOutcome = runHookScript( "scripts/check_bash_no_write.sh", diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0501085d99..b4ddf712d8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -215,6 +215,13 @@ repos: language: system files: ^tests/.*\.py$ + - id: baseline-growth + name: block growth of gate suppression baselines + entry: uv + args: [run, python, scripts/check_baseline_growth.py] + language: system + files: ^scripts/(_?[a-z_]+_baseline\.(txt|json|py))$ + - id: no-release-please-token name: forbid new RELEASE_PLEASE_TOKEN references entry: uv diff --git a/scripts/check_baseline_growth.py b/scripts/check_baseline_growth.py new file mode 100644 index 0000000000..26f73610f8 --- /dev/null +++ b/scripts/check_baseline_growth.py @@ -0,0 +1,139 @@ +"""Block commits that grow gate-suppression baselines. + +Gate suppression baselines (``scripts/*_baseline.txt``, +``scripts/*_baseline.json``, ``scripts/_*_baseline.py``) record pre-existing +violations that the corresponding ``check_*`` gate is allowed to skip. The +baselines are intended to shrink monotonically -- a PR that makes the codebase +worse by silencing a new violation should fail the gate, not amend the +baseline. + +This pre-commit hook diffs each baseline file against ``HEAD`` and rejects +the commit if the staged version contains *more* entries than the committed +version. Shrinking and rewording are allowed; growing is not. + +Bypass (rare; requires explicit user approval): ``SKIP=baseline-growth git +commit ...`` or set ``ALLOW_BASELINE_GROWTH=1`` in the environment. +""" + +import json +import os +import subprocess +import sys +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parent.parent +EXIT_OK = 0 +EXIT_GROWTH_DETECTED = 1 + + +def _is_baseline_path(path: str) -> bool: + """Return ``True`` for paths that this gate should compare against HEAD.""" + if not path.startswith("scripts/"): + return False + name = path[len("scripts/") :] + if "/" in name: + return False + if name.endswith(("_baseline.txt", "_baseline.json")): + return True + return name.startswith("_") and name.endswith("_baseline.py") + + +def _count_json_entries(text: str) -> int: + try: + payload = json.loads(text) + except json.JSONDecodeError: + return -1 + if isinstance(payload, dict): + locations = payload.get("locations") + if isinstance(locations, dict): + return len(locations) + if isinstance(locations, list): + return len(locations) + return 0 + if isinstance(payload, list): + return len(payload) + return 0 + + +def _count_text_entries(text: str) -> int: + return sum( + 1 + for line in text.splitlines() + if line.strip() and not line.lstrip().startswith("#") + ) + + +def _staged_entries(text: str, suffix: str) -> int: + """Count the entries in a baseline file's content.""" + if suffix == ".json": + return _count_json_entries(text) + return _count_text_entries(text) + + +def _read_head(path: str) -> str | None: + """Return the file at ``HEAD`` or ``None`` if it did not exist there.""" + try: + result = subprocess.run( + ["git", "show", f"HEAD:{path}"], + check=False, + capture_output=True, + text=True, + ) + except OSError: + return None + if result.returncode != 0: + return None + return result.stdout + + +def _classify(path: str) -> str: + if path.endswith(".json"): + return ".json" + if path.endswith(".py"): + return ".py" + return ".txt" + + +def main(argv: list[str]) -> int: + """Compare staged baseline files against HEAD; reject growth.""" + if os.environ.get("ALLOW_BASELINE_GROWTH") == "1": + return EXIT_OK + paths = [p for p in argv[1:] if _is_baseline_path(p)] + if not paths: + return EXIT_OK + grown: list[tuple[str, int, int]] = [] + for path in paths: + suffix = _classify(path) + absolute = REPO_ROOT / path + try: + staged_text = absolute.read_text(encoding="utf-8") + except OSError: + continue + head_text = _read_head(path) + head_count = _staged_entries(head_text, suffix) if head_text is not None else 0 + staged_count = _staged_entries(staged_text, suffix) + if staged_count > head_count: + grown.append((path, head_count, staged_count)) + if not grown: + return EXIT_OK + print( + "Gate suppression baselines must shrink monotonically. " + "These files have more entries than HEAD:", + file=sys.stderr, + ) + for path, head_count, staged_count in grown: + print( + f" {path}: {head_count} -> {staged_count} (+{staged_count - head_count})", + file=sys.stderr, + ) + print( + "\nFix the source instead, or ask the user before adding a new exception.\n" + "Bypass (requires explicit user approval): " + "ALLOW_BASELINE_GROWTH=1 git commit ...", + file=sys.stderr, + ) + return EXIT_GROWTH_DETECTED + + +if __name__ == "__main__": + sys.exit(main(sys.argv)) diff --git a/scripts/check_no_baseline_update.sh b/scripts/check_no_baseline_update.sh new file mode 100755 index 0000000000..fa88b14df2 --- /dev/null +++ b/scripts/check_no_baseline_update.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash +# PreToolUse hook: block gate-baseline regeneration commands. +# +# `python scripts/check_.py --update-baseline` (and the older `--update` +# flag on a few gates) silently rewrites a baseline file to absorb new +# violations. This is the exact "grow gate-suppression debt" path we want +# to stop happening autonomously. +# +# Regeneration is rare and requires explicit user approval. If the user +# decides a regeneration is warranted, they can run the command themselves +# in a `! `-prefixed bash invocation, or temporarily disable this hook. +# +# Exit behavior: +# - Non-update-baseline commands: exit 0 (allow) +# - --update-baseline / --update on a check_*.py: print JSON, exit 2 + +set -euo pipefail + +if ! COMMAND=$(jq -r '.tool_input.command // ""' 2>/dev/null); then + exit 0 +fi +if [[ -z "$COMMAND" ]]; then + exit 0 +fi + +# Match patterns: +# scripts/check_.py --update-baseline +# scripts/check_.py --update +# python ... check_.py ... --update-baseline / --update +# uv run python ... check_.py ... --update-baseline / --update +# python scripts/check_.py --refresh-baseline +if [[ "$COMMAND" =~ scripts/check_[a-z_]+\.py.*(--update-baseline|--refresh-baseline) ]] || \ + [[ "$COMMAND" =~ scripts/check_[a-z_]+\.py.*--update($|[[:space:]]) ]]; then + REASON="Gate baseline regeneration is not autonomously allowed. Adding a new exception requires explicit user approval. Fix the source instead, or ask the user before running --update-baseline / --update / --refresh-baseline." + cat < ModuleType: + spec = importlib.util.spec_from_file_location( + "_check_baseline_growth", + _SCRIPT_PATH, + ) + assert spec is not None + assert spec.loader is not None + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +_MODULE: Any = cast("Any", _load_script_module()) + + +# ── path classification ───────────────────────────────────────── + + +@pytest.mark.parametrize( + ("path", "expected"), + [ + ("scripts/mock_spec_baseline.txt", True), + ("scripts/no_magic_numbers_baseline.txt", True), + ("scripts/_workflow_shell_git_commits_baseline.json", True), + ("scripts/_schema_drift_baseline.py", True), + ("scripts/check_mock_spec.py", False), + ("scripts/check_no_edit_baseline.sh", False), + ("scripts/no_baseline_at_all.txt", False), + ("tests/baselines/unit_timing.json", False), + ("scripts/subdir/mock_spec_baseline.txt", False), + ("README.md", False), + ], +) +def test_is_baseline_path(path: str, expected: bool) -> None: + assert _MODULE._is_baseline_path(path) is expected + + +# ── entry counting ────────────────────────────────────────────── + + +def test_count_text_entries_skips_comments_and_blanks() -> None: + text = ( + "# header line\n" + "# another comment\n" + "\n" + "src/a.py:10:5\n" + "src/b.py:20:7\n" + " \n" + "src/c.py:30:9\n" + ) + assert _MODULE._count_text_entries(text) == 3 + + +def test_count_json_entries_locations_dict() -> None: + text = json.dumps( + {"description": "x", "locations": {"a": 1, "b": 2, "c": 3}}, + ) + assert _MODULE._count_json_entries(text) == 3 + + +def test_count_json_entries_locations_list() -> None: + text = json.dumps({"locations": ["a", "b"]}) + assert _MODULE._count_json_entries(text) == 2 + + +def test_count_json_entries_top_level_list() -> None: + text = json.dumps([1, 2, 3, 4]) + assert _MODULE._count_json_entries(text) == 4 + + +def test_count_json_entries_invalid_returns_negative() -> None: + assert _MODULE._count_json_entries("not json") == -1 + + +def test_count_json_entries_no_locations_dict() -> None: + assert _MODULE._count_json_entries(json.dumps({"foo": "bar"})) == 0 + + +# ── classification ────────────────────────────────────────────── + + +@pytest.mark.parametrize( + ("path", "expected"), + [ + ("scripts/foo_baseline.txt", ".txt"), + ("scripts/foo_baseline.json", ".json"), + ("scripts/_foo_baseline.py", ".py"), + ], +) +def test_classify(path: str, expected: str) -> None: + assert _MODULE._classify(path) == expected + + +# ── main: end-to-end behaviour ────────────────────────────────── + + +def test_main_returns_zero_on_no_baseline_paths() -> None: + rc: int = _MODULE.main(["check_baseline_growth.py", "src/foo.py"]) + assert rc == 0 + + +def test_main_bypass_via_env(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("ALLOW_BASELINE_GROWTH", "1") + rc: int = _MODULE.main( + ["check_baseline_growth.py", "scripts/mock_spec_baseline.txt"], + ) + assert rc == 0 + + +def test_main_detects_growth( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + baseline = tmp_path / "scripts" / "fake_baseline.txt" + baseline.parent.mkdir(parents=True) + baseline.write_text( + "# header\nsrc/a.py:1:1\nsrc/b.py:2:2\nsrc/c.py:3:3\n", + encoding="utf-8", + ) + monkeypatch.setattr(_MODULE, "REPO_ROOT", tmp_path) + monkeypatch.delenv("ALLOW_BASELINE_GROWTH", raising=False) + with patch.object( + _MODULE, + "_read_head", + return_value="# header\nsrc/a.py:1:1\n", + ): + rc: int = _MODULE.main( + ["check_baseline_growth.py", "scripts/fake_baseline.txt"], + ) + assert rc == 1 + captured = capsys.readouterr() + assert "fake_baseline.txt" in captured.err + assert "1 -> 3" in captured.err + assert "ALLOW_BASELINE_GROWTH=1" in captured.err + + +def test_main_allows_shrink( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + baseline = tmp_path / "scripts" / "fake_baseline.txt" + baseline.parent.mkdir(parents=True) + baseline.write_text( + "# header\nsrc/a.py:1:1\n", + encoding="utf-8", + ) + monkeypatch.setattr(_MODULE, "REPO_ROOT", tmp_path) + monkeypatch.delenv("ALLOW_BASELINE_GROWTH", raising=False) + with patch.object( + _MODULE, + "_read_head", + return_value="# header\nsrc/a.py:1:1\nsrc/b.py:2:2\nsrc/c.py:3:3\n", + ): + rc: int = _MODULE.main( + ["check_baseline_growth.py", "scripts/fake_baseline.txt"], + ) + assert rc == 0 + + +def test_main_treats_missing_head_as_empty( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + baseline = tmp_path / "scripts" / "new_baseline.txt" + baseline.parent.mkdir(parents=True) + baseline.write_text("src/a.py:1:1\n", encoding="utf-8") + monkeypatch.setattr(_MODULE, "REPO_ROOT", tmp_path) + monkeypatch.delenv("ALLOW_BASELINE_GROWTH", raising=False) + with patch.object( + _MODULE, + "_read_head", + return_value=None, + ): + rc: int = _MODULE.main( + ["check_baseline_growth.py", "scripts/new_baseline.txt"], + ) + assert rc == 1 + + +def test_read_head_returns_none_for_missing(monkeypatch: pytest.MonkeyPatch) -> None: + def fake_run( + cmd: list[str], + **kwargs: object, + ) -> subprocess.CompletedProcess[str]: + del kwargs + return subprocess.CompletedProcess( + args=cmd, + returncode=128, + stdout="", + stderr="", + ) + + monkeypatch.setattr(_MODULE.subprocess, "run", fake_run) + assert _MODULE._read_head("scripts/does_not_exist.txt") is None From 0ffaaa6bb1122f4e39d0645366985bf774d160fe Mon Sep 17 00:00:00 2001 From: Aurelio <19254254+Aureliolo@users.noreply.github.com> Date: Sun, 10 May 2026 11:15:21 +0200 Subject: [PATCH 2/6] feat: block em-dashes at write time via PreToolUse hook --- .claude/settings.json | 5 + .opencode/plugins/synthorg-hooks.ts | 25 ++++ scripts/check_no_em_dashes_hook.sh | 70 +++++++++ .../scripts/test_check_no_em_dashes_hook.py | 138 ++++++++++++++++++ 4 files changed, 238 insertions(+) create mode 100755 scripts/check_no_em_dashes_hook.sh create mode 100644 tests/unit/scripts/test_check_no_em_dashes_hook.py diff --git a/.claude/settings.json b/.claude/settings.json index 5ffd6cf29a..39acc8078c 100644 --- a/.claude/settings.json +++ b/.claude/settings.json @@ -74,6 +74,11 @@ "command": "bash scripts/check_no_edit_baseline.sh", "timeout": 5000 }, + { + "type": "command", + "command": "bash scripts/check_no_em_dashes_hook.sh", + "timeout": 5000 + }, { "type": "command", "command": "bash scripts/check_pre_pr_review_triage_gate.sh", diff --git a/.opencode/plugins/synthorg-hooks.ts b/.opencode/plugins/synthorg-hooks.ts index f66c6d5dd9..c836c20be4 100644 --- a/.opencode/plugins/synthorg-hooks.ts +++ b/.opencode/plugins/synthorg-hooks.ts @@ -13,6 +13,7 @@ * PreToolUse (Bash | Edit): scripts/check_no_bulk_edit.py * PreToolUse (Edit|Write): scripts/check_no_edit_migration.sh * PreToolUse (Edit|Write): scripts/check_no_edit_baseline.sh + * PreToolUse (Edit|Write): scripts/check_no_em_dashes_hook.sh * PreToolUse (Edit|Write): scripts/check_pre_pr_review_triage_gate.sh * PostToolUse (Edit|Write): scripts/check_web_design_system.py * PostToolUse (Edit|Write): scripts/check_backend_regional_defaults.py @@ -211,6 +212,30 @@ export const SynthOrgHooks: Plugin = async ({ client, $, app }) => { throw new Error(denyReason); } } + + // check_no_em_dashes_hook.sh: inspects the candidate content + // (`.tool_input.content` for Write, `.tool_input.new_string` for + // Edit) so the em-dash never lands on disk. Mirrors the + // pre-commit gate in scripts/check_no_em_dashes.py. + { + const args = (output.args ?? {}) as Record; + const emDashInput: Record = { file_path: filePath }; + if (typeof args.content === "string") { + emDashInput.content = args.content; + } + if (typeof args.new_string === "string") { + emDashInput.new_string = args.new_string; + } + const outcome = runHookScript( + "scripts/check_no_em_dashes_hook.sh", + emDashInput, + 5000, + ); + const denyReason = denyReasonFromOutcome(outcome); + if (denyReason) { + throw new Error(denyReason); + } + } } // Only the remaining bash / shell checks apply below diff --git a/scripts/check_no_em_dashes_hook.sh b/scripts/check_no_em_dashes_hook.sh new file mode 100755 index 0000000000..6d2cd9c77b --- /dev/null +++ b/scripts/check_no_em_dashes_hook.sh @@ -0,0 +1,70 @@ +#!/usr/bin/env bash +# PreToolUse hook (Edit|Write): block em-dashes BEFORE the write lands. +# +# The pre-commit gate scripts/check_no_em_dashes.py catches em-dashes after +# the fact, which means a writer-then-reviewer round-trip per offence. This +# hook fires earlier, on the Edit/Write tool call itself, so the violation +# never reaches disk. +# +# Patterns blocked: +# - U+2014 EM DASH (the literal character) +# - The HTML named, decimal, and hex entities for U+2014. +# (Patterns are reconstructed at runtime via shell concatenation so this +# script's source stays clear of every literal blocked pattern -- mirrors +# the same trick in scripts/check_no_em_dashes.py.) +# +# Excluded paths (mirrored from check_no_em_dashes.py): +# .github/CHANGELOG.md -- regenerated from history by release-please. +# +# Exit behaviour: +# - No em-dash in the candidate content: exit 0 (allow) +# - Em-dash detected: print JSON deny envelope, exit 2 + +set -euo pipefail + +if ! INPUT=$(cat); then + exit 0 +fi +if [[ -z "$INPUT" ]]; then + exit 0 +fi + +FILE_PATH=$(jq -r '.tool_input.file_path // ""' <<<"$INPUT" 2>/dev/null || true) +if [[ -z "$FILE_PATH" ]]; then + exit 0 +fi + +case "$FILE_PATH" in + */.github/CHANGELOG.md|.github/CHANGELOG.md) + exit 0 + ;; +esac + +# Write tool: .tool_input.content +# Edit tool: .tool_input.new_string +CONTENT=$(jq -r '.tool_input.content // .tool_input.new_string // ""' <<<"$INPUT" 2>/dev/null || true) +if [[ -z "$CONTENT" ]]; then + exit 0 +fi + +EM_DASH=$'\xe2\x80\x94' +AMP='&' +HTML_NAMED="${AMP}mdash;" +HTML_DEC="${AMP}#8212;" +HTML_HEX="${AMP}#x2014;" + +if grep -qF -e "$EM_DASH" -e "$HTML_NAMED" -e "$HTML_DEC" -e "$HTML_HEX" <<<"$CONTENT"; then + REASON="Em-dash (U+2014) detected in the content being written. Replace with the ASCII punctuation that matches the sentence: ':' (colon), ';' (semicolon), ',' (comma), '.' (period), '( ... )' (parens), or '-' (hyphen, compound only). Bare '--' is almost never right; prefer one of the above or rewrite." + cat < subprocess.CompletedProcess[str]: + assert _BASH is not None + return subprocess.run( # noqa: S603 + [_BASH, str(_SCRIPT)], + input=json.dumps(envelope), + capture_output=True, + text=True, + check=False, + encoding="utf-8", + ) + + +@_BASH_AVAILABLE +def test_blocks_em_dash_in_write_content() -> None: + result = _run( + { + "tool_input": { + "file_path": "src/foo.py", + "content": f"hello {_EM_DASH} world", + }, + }, + ) + assert result.returncode == 2 + assert "permissionDecision" in result.stdout + assert "deny" in result.stdout + + +@_BASH_AVAILABLE +def test_blocks_em_dash_in_edit_new_string() -> None: + result = _run( + { + "tool_input": { + "file_path": "src/foo.py", + "old_string": "x", + "new_string": f"hello {_EM_DASH} world", + }, + }, + ) + assert result.returncode == 2 + + +@_BASH_AVAILABLE +@pytest.mark.parametrize( + "entity", + [_HTML_NAMED, _HTML_DEC, _HTML_HEX], +) +def test_blocks_html_entities(entity: str) -> None: + result = _run( + {"tool_input": {"file_path": "src/foo.html", "content": f"foo {entity} bar"}}, + ) + assert result.returncode == 2 + + +@_BASH_AVAILABLE +def test_allows_ascii_hyphen() -> None: + result = _run( + {"tool_input": {"file_path": "src/foo.py", "content": "hello - world"}}, + ) + assert result.returncode == 0 + + +@_BASH_AVAILABLE +def test_allows_double_hyphen() -> None: + result = _run( + {"tool_input": {"file_path": "src/foo.py", "content": "foo -- bar"}}, + ) + assert result.returncode == 0 + + +@_BASH_AVAILABLE +def test_skips_changelog_md() -> None: + result = _run( + { + "tool_input": { + "file_path": ".github/CHANGELOG.md", + "content": f"hello {_EM_DASH} world", + }, + }, + ) + assert result.returncode == 0 + + +@_BASH_AVAILABLE +def test_skips_changelog_md_absolute_path() -> None: + result = _run( + { + "tool_input": { + "file_path": "/absolute/.github/CHANGELOG.md", + "content": f"hello {_EM_DASH} world", + }, + }, + ) + assert result.returncode == 0 + + +@_BASH_AVAILABLE +def test_allows_empty_content() -> None: + result = _run({"tool_input": {"file_path": "src/foo.py", "content": ""}}) + assert result.returncode == 0 + + +@_BASH_AVAILABLE +def test_allows_when_no_file_path() -> None: + result = _run({"tool_input": {}}) + assert result.returncode == 0 From d319c4f4286cb0bef2000abc33c7f2e110b8c182 Mon Sep 17 00:00:00 2001 From: Aurelio <19254254+Aureliolo@users.noreply.github.com> Date: Sun, 10 May 2026 12:30:35 +0200 Subject: [PATCH 3/6] fix: address pre-pr-review findings on baseline-edit-protection --- .opencode/plugins/synthorg-hooks.ts | 41 ++++--- .pre-commit-config.yaml | 2 +- CLAUDE.md | 2 +- data/runtime_stats.yaml | 4 +- docs/reference/convention-gates.md | 16 ++- scripts/check_baseline_growth.py | 78 +++++++++--- scripts/check_no_baseline_update.sh | 18 +-- scripts/check_no_em_dashes_hook.sh | 20 +-- .../scripts/test_check_baseline_growth.py | 115 +++++++++++++++++- .../scripts/test_check_no_em_dashes_hook.py | 27 ++++ 10 files changed, 260 insertions(+), 63 deletions(-) diff --git a/.opencode/plugins/synthorg-hooks.ts b/.opencode/plugins/synthorg-hooks.ts index c836c20be4..aa8140a885 100644 --- a/.opencode/plugins/synthorg-hooks.ts +++ b/.opencode/plugins/synthorg-hooks.ts @@ -195,16 +195,19 @@ export const SynthOrgHooks: Plugin = async ({ client, $, app }) => { } } - const payload = { tool_input: { file_path: filePath } } as Record; + const filePathInput = { file_path: filePath } as Record; + // Order must match `.claude/settings.json` PreToolUse Edit|Write: + // migration, baseline, em-dash (richer payload), triage-gate. + // The em-dash hook runs between baseline and triage-gate to enforce + // content-correctness before the workflow-lock fires. for (const script of [ "scripts/check_no_edit_migration.sh", "scripts/check_no_edit_baseline.sh", - "scripts/check_pre_pr_review_triage_gate.sh", ]) { const outcome = runHookScript( script, - payload.tool_input as Record, + filePathInput, 5000, ); const denyReason = denyReasonFromOutcome(outcome); @@ -214,18 +217,16 @@ export const SynthOrgHooks: Plugin = async ({ client, $, app }) => { } // check_no_em_dashes_hook.sh: inspects the candidate content - // (`.tool_input.content` for Write, `.tool_input.new_string` for - // Edit) so the em-dash never lands on disk. Mirrors the - // pre-commit gate in scripts/check_no_em_dashes.py. + // before it lands on disk (mirrors scripts/check_no_em_dashes.py). + const args = (output.args ?? {}) as Record; + const emDashInput = { ...filePathInput } as Record; + if (typeof args.content === "string") { + emDashInput.content = args.content; + } + if (typeof args.new_string === "string") { + emDashInput.new_string = args.new_string; + } { - const args = (output.args ?? {}) as Record; - const emDashInput: Record = { file_path: filePath }; - if (typeof args.content === "string") { - emDashInput.content = args.content; - } - if (typeof args.new_string === "string") { - emDashInput.new_string = args.new_string; - } const outcome = runHookScript( "scripts/check_no_em_dashes_hook.sh", emDashInput, @@ -236,6 +237,18 @@ export const SynthOrgHooks: Plugin = async ({ client, $, app }) => { throw new Error(denyReason); } } + + { + const outcome = runHookScript( + "scripts/check_pre_pr_review_triage_gate.sh", + filePathInput, + 5000, + ); + const denyReason = denyReasonFromOutcome(outcome); + if (denyReason) { + throw new Error(denyReason); + } + } } // Only the remaining bash / shell checks apply below diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b4ddf712d8..fb84b9962b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -220,7 +220,7 @@ repos: entry: uv args: [run, python, scripts/check_baseline_growth.py] language: system - files: ^scripts/(_?[a-z_]+_baseline\.(txt|json|py))$ + files: ^scripts/(([a-z][a-z_]*_baseline\.(txt|json))|(_[a-z][a-z_]*_baseline\.py))$ - id: no-release-please-token name: forbid new RELEASE_PLEASE_TOKEN references diff --git a/CLAUDE.md b/CLAUDE.md index 4ff6b6403d..6c214bc599 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -15,7 +15,7 @@ Web: see `web/CLAUDE.md`. CLI: see `cli/CLAUDE.md` (use `go -C cli`, never `cd c - **Configuration Precedence (MANDATORY)**: DB > env > YAML > code default via `SettingsService`/`ConfigResolver`; no `os.environ.get` outside startup. See [docs/reference/configuration-precedence.md](docs/reference/configuration-precedence.md). - **No Hardcoded Values (MANDATORY)**: numerics live in `settings/definitions/`; allowlist 0/1/-1, HTTP codes, hex masks, powers-of-2. Enforced by `scripts/check_no_magic_numbers.py`. - **Doc Numeric Claims (MANDATORY)**: numerics in README + public docs sourced from `data/runtime_stats.yaml` via `` markers. See `data/README.md`. -- **Test Regression (MANDATORY)**: timeout/slow failures = source-code regression; never edit `tests/baselines/unit_timing.json`. +- **Test Regression (MANDATORY)**: timeout/slow failures = source-code regression; never edit `tests/baselines/unit_timing.json` or any `scripts/*_baseline.{txt,json}` / `scripts/_*_baseline.py`. Both families are PreToolUse-blocked. Per-invocation bypass for gate baselines: `ALLOW_BASELINE_GROWTH=1 git commit ...` (requires explicit user approval). - **Post-Implementation + Pre-PR Review (MANDATORY)**: after issue: branch + commit + push (no auto-PR); use `/pre-pr-review` (gh pr create is hookify-blocked). After PR: `/aurelio-review-pr` for external feedback. Fix EVERYTHING valid; no deferring. ## Quick Commands diff --git a/data/runtime_stats.yaml b/data/runtime_stats.yaml index cfc5c14e27..9cc22343dd 100644 --- a/data/runtime_stats.yaml +++ b/data/runtime_stats.yaml @@ -43,8 +43,8 @@ stats: raw: 7 display: "7" convention_gates: - raw: 37 - display: "37" + raw: 38 + display: "38" sources: tests: "uv run python -m pytest --collect-only -q" diff --git a/docs/reference/convention-gates.md b/docs/reference/convention-gates.md index 1c33058f2b..99a2358816 100644 --- a/docs/reference/convention-gates.md +++ b/docs/reference/convention-gates.md @@ -11,6 +11,7 @@ The gate's job is to catch the SECOND occurrence of the category; the audit's jo All under `scripts/`. The list is generated by `ls scripts/check_*.py`; if an entry below disappears or a new check script lands, update this page in the same PR (and the meta-gate enforces this for the canonical doc set). - `check_backend_regional_defaults.py` +- `check_baseline_growth.py` - `check_boundary_typed.py` - `check_currency_aggregation_invariant.py` - `check_dead_api_endpoints.py` @@ -47,7 +48,20 @@ All under `scripts/`. The list is generated by `ls scripts/check_*.py`; if an en - `check_workflow_shell_git_commits.py` - `check_workflow_tag_lifecycle.py` -(37 total `check_*.py` scripts: enforcement gates plus the meta-gate below.) +(38 total `check_*.py` scripts: enforcement gates plus the meta-gate below.) + +## PreToolUse hooks (Claude Code + OpenCode) + +Some conventions are also enforced *before* the file lands on disk so the offending content never reaches the diff. Bash scripts under `scripts/` registered in `.claude/settings.json` and `.opencode/plugins/synthorg-hooks.ts`: + +- `check_no_edit_baseline.sh`: blocks `Edit` / `Write` on `tests/baselines/*.json`, `scripts/*_baseline.{txt,json}`, and `scripts/_*_baseline.py`. +- `check_no_baseline_update.sh`: blocks `Bash` invocations of `scripts/check_*.py --update-baseline` / `--update` / `--refresh-baseline`. +- `check_no_em_dashes_hook.sh`: blocks `Edit` / `Write` whose candidate content contains a U+2014 em-dash or one of its HTML entities. Mirrors the diff-time `check_no_em_dashes.py` pre-commit gate. +- `check_no_edit_migration.sh`: blocks `Edit` / `Write` on `src/synthorg/persistence/{sqlite,postgres}/revisions/*.sql` (use `atlas migrate diff` instead). +- `check_no_atlas_rehash.sh`: blocks `Bash` invocations of `atlas migrate hash` (rehashing breaks installed databases). +- `check_pre_pr_review_triage_gate.sh`: blocks `Edit` / `Write` outside `_audit/` while a `/pre-pr-review` triage table is pending user approval. + +The hook layer is fail-closed: the OpenCode plugin treats hook execution errors as denials, so a misbehaving hook script blocks the action rather than letting it through. ## Registration procedure diff --git a/scripts/check_baseline_growth.py b/scripts/check_baseline_growth.py index 26f73610f8..a9a833c4d4 100644 --- a/scripts/check_baseline_growth.py +++ b/scripts/check_baseline_growth.py @@ -24,6 +24,11 @@ REPO_ROOT = Path(__file__).resolve().parent.parent EXIT_OK = 0 EXIT_GROWTH_DETECTED = 1 +EXIT_INVALID_BASELINE = 2 + + +class InvalidBaselineError(Exception): + """Raised when a staged baseline file cannot be parsed.""" def _is_baseline_path(path: str) -> bool: @@ -39,10 +44,17 @@ def _is_baseline_path(path: str) -> bool: def _count_json_entries(text: str) -> int: + """Count entries in a JSON baseline; raise ``InvalidBaselineError`` on parse failure. + + A corrupt baseline must block the commit, never silently pass. The previous + sentinel return of -1 was less-than every non-negative ``head_count``, which + let malformed baselines slip through the ``staged > head`` comparison. + """ try: payload = json.loads(text) - except json.JSONDecodeError: - return -1 + except json.JSONDecodeError as exc: + msg = f"baseline JSON failed to parse: {exc.msg} at line {exc.lineno}" + raise InvalidBaselineError(msg) from exc if isinstance(payload, dict): locations = payload.get("locations") if isinstance(locations, dict): @@ -71,7 +83,13 @@ def _staged_entries(text: str, suffix: str) -> int: def _read_head(path: str) -> str | None: - """Return the file at ``HEAD`` or ``None`` if it did not exist there.""" + """Return the file at ``HEAD`` or ``None`` if it did not exist there. + + ``FileNotFoundError`` from a missing ``git`` binary is treated the same as + "no HEAD content", matching the not-yet-committed-baseline path. Any other + OS error is surfaced via stderr so a broken environment is visible rather + than silently letting growth through. + """ try: result = subprocess.run( ["git", "show", f"HEAD:{path}"], @@ -79,7 +97,10 @@ def _read_head(path: str) -> str | None: capture_output=True, text=True, ) - except OSError: + except FileNotFoundError: + return None + except OSError as exc: + print(f"WARNING: git show failed for {path}: {exc}", file=sys.stderr) return None if result.returncode != 0: return None @@ -94,6 +115,34 @@ def _classify(path: str) -> str: return ".txt" +def _inspect_path( + path: str, + grown: list[tuple[str, int, int]], + invalid: list[tuple[str, str]], +) -> None: + """Compare one staged baseline against HEAD; record growth or parse failure.""" + suffix = _classify(path) + absolute = REPO_ROOT / path + try: + staged_text = absolute.read_text(encoding="utf-8") + except OSError: + return + try: + staged_count = _staged_entries(staged_text, suffix) + except InvalidBaselineError as exc: + invalid.append((path, str(exc))) + return + head_text = _read_head(path) + head_count = 0 + if head_text is not None: + try: + head_count = _staged_entries(head_text, suffix) + except InvalidBaselineError: + head_count = 0 + if staged_count > head_count: + grown.append((path, head_count, staged_count)) + + def main(argv: list[str]) -> int: """Compare staged baseline files against HEAD; reject growth.""" if os.environ.get("ALLOW_BASELINE_GROWTH") == "1": @@ -102,18 +151,17 @@ def main(argv: list[str]) -> int: if not paths: return EXIT_OK grown: list[tuple[str, int, int]] = [] + invalid: list[tuple[str, str]] = [] for path in paths: - suffix = _classify(path) - absolute = REPO_ROOT / path - try: - staged_text = absolute.read_text(encoding="utf-8") - except OSError: - continue - head_text = _read_head(path) - head_count = _staged_entries(head_text, suffix) if head_text is not None else 0 - staged_count = _staged_entries(staged_text, suffix) - if staged_count > head_count: - grown.append((path, head_count, staged_count)) + _inspect_path(path, grown, invalid) + if invalid: + print( + "Gate suppression baselines failed to parse:", + file=sys.stderr, + ) + for path, reason in invalid: + print(f" {path}: {reason}", file=sys.stderr) + return EXIT_INVALID_BASELINE if not grown: return EXIT_OK print( diff --git a/scripts/check_no_baseline_update.sh b/scripts/check_no_baseline_update.sh index fa88b14df2..47447c7834 100755 --- a/scripts/check_no_baseline_update.sh +++ b/scripts/check_no_baseline_update.sh @@ -10,15 +10,13 @@ # decides a regeneration is warranted, they can run the command themselves # in a `! `-prefixed bash invocation, or temporarily disable this hook. # -# Exit behavior: +# Exit behaviour: # - Non-update-baseline commands: exit 0 (allow) # - --update-baseline / --update on a check_*.py: print JSON, exit 2 set -euo pipefail -if ! COMMAND=$(jq -r '.tool_input.command // ""' 2>/dev/null); then - exit 0 -fi +COMMAND=$(jq -r '.tool_input.command // ""' 2>/dev/null || true) if [[ -z "$COMMAND" ]]; then exit 0 fi @@ -32,15 +30,9 @@ fi if [[ "$COMMAND" =~ scripts/check_[a-z_]+\.py.*(--update-baseline|--refresh-baseline) ]] || \ [[ "$COMMAND" =~ scripts/check_[a-z_]+\.py.*--update($|[[:space:]]) ]]; then REASON="Gate baseline regeneration is not autonomously allowed. Adding a new exception requires explicit user approval. Fix the source instead, or ask the user before running --update-baseline / --update / --refresh-baseline." - cat </dev/null || true) if [[ -z "$CONTENT" ]]; then exit 0 @@ -55,15 +53,9 @@ HTML_HEX="${AMP}#x2014;" if grep -qF -e "$EM_DASH" -e "$HTML_NAMED" -e "$HTML_DEC" -e "$HTML_HEX" <<<"$CONTENT"; then REASON="Em-dash (U+2014) detected in the content being written. Replace with the ASCII punctuation that matches the sentence: ':' (colon), ';' (semicolon), ',' (comma), '.' (period), '( ... )' (parens), or '-' (hyphen, compound only). Bare '--' is almost never right; prefer one of the above or rewrite." - cat < None: assert _MODULE._count_json_entries(text) == 4 -def test_count_json_entries_invalid_returns_negative() -> None: - assert _MODULE._count_json_entries("not json") == -1 +def test_count_json_entries_invalid_raises() -> None: + with pytest.raises(_MODULE.InvalidBaselineError): + _MODULE._count_json_entries("not json") def test_count_json_entries_no_locations_dict() -> None: @@ -215,3 +216,113 @@ def fake_run( monkeypatch.setattr(_MODULE.subprocess, "run", fake_run) assert _MODULE._read_head("scripts/does_not_exist.txt") is None + + +def test_read_head_returns_none_when_git_binary_missing( + monkeypatch: pytest.MonkeyPatch, +) -> None: + def fake_run( + cmd: list[str], + **kwargs: object, + ) -> subprocess.CompletedProcess[str]: + del cmd, kwargs + msg = "git binary not on PATH" + raise FileNotFoundError(msg) + + monkeypatch.setattr(_MODULE.subprocess, "run", fake_run) + assert _MODULE._read_head("scripts/does_not_exist.txt") is None + + +def test_read_head_warns_on_unexpected_oserror( + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + def fake_run( + cmd: list[str], + **kwargs: object, + ) -> subprocess.CompletedProcess[str]: + del cmd, kwargs + msg = "git denied" + raise PermissionError(msg) + + monkeypatch.setattr(_MODULE.subprocess, "run", fake_run) + assert _MODULE._read_head("scripts/some_baseline.txt") is None + captured = capsys.readouterr() + assert "git show failed" in captured.err + assert "some_baseline.txt" in captured.err + + +def test_main_blocks_invalid_json_baseline( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + baseline = tmp_path / "scripts" / "_fake_baseline.json" + baseline.parent.mkdir(parents=True) + baseline.write_text("{ this is not valid json ", encoding="utf-8") + monkeypatch.setattr(_MODULE, "REPO_ROOT", tmp_path) + monkeypatch.delenv("ALLOW_BASELINE_GROWTH", raising=False) + with patch.object(_MODULE, "_read_head", return_value="{}"): + rc: int = _MODULE.main( + ["check_baseline_growth.py", "scripts/_fake_baseline.json"], + ) + assert rc == _MODULE.EXIT_INVALID_BASELINE + captured = capsys.readouterr() + assert "_fake_baseline.json" in captured.err + assert "failed to parse" in captured.err + + +def test_main_skips_unreadable_staged_file( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """A staged file that disappears between staging and the gate is silently skipped. + + Pre-commit cannot reach this case in practice because the file is staged, + but a hostile filesystem (anti-virus quarantine, race with cleanup) could + surface OSError. Skipping silently matches the intent: the gate has nothing + to compare against. + """ + monkeypatch.setattr(_MODULE, "REPO_ROOT", tmp_path) + monkeypatch.delenv("ALLOW_BASELINE_GROWTH", raising=False) + rc: int = _MODULE.main( + ["check_baseline_growth.py", "scripts/missing_baseline.txt"], + ) + assert rc == 0 + + +def test_main_handles_multiple_paths_mixed_states( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + grew = tmp_path / "scripts" / "grew_baseline.txt" + shrank = tmp_path / "scripts" / "shrank_baseline.txt" + grew.parent.mkdir(parents=True) + grew.write_text("a\nb\nc\nd\n", encoding="utf-8") + shrank.write_text("a\n", encoding="utf-8") + monkeypatch.setattr(_MODULE, "REPO_ROOT", tmp_path) + monkeypatch.delenv("ALLOW_BASELINE_GROWTH", raising=False) + head_lookup = { + "scripts/grew_baseline.txt": "a\nb\n", + "scripts/shrank_baseline.txt": "a\nb\nc\n", + "scripts/missing_baseline.txt": None, + } + + def fake_read_head(path: str) -> str | None: + return head_lookup[path] + + with patch.object(_MODULE, "_read_head", side_effect=fake_read_head): + rc: int = _MODULE.main( + [ + "check_baseline_growth.py", + "scripts/grew_baseline.txt", + "scripts/shrank_baseline.txt", + "scripts/missing_baseline.txt", + ], + ) + assert rc == 1 + captured = capsys.readouterr() + assert "grew_baseline.txt" in captured.err + assert "shrank_baseline.txt" not in captured.err + assert "missing_baseline.txt" not in captured.err diff --git a/tests/unit/scripts/test_check_no_em_dashes_hook.py b/tests/unit/scripts/test_check_no_em_dashes_hook.py index d75ca10918..57a0657b38 100644 --- a/tests/unit/scripts/test_check_no_em_dashes_hook.py +++ b/tests/unit/scripts/test_check_no_em_dashes_hook.py @@ -136,3 +136,30 @@ def test_allows_empty_content() -> None: def test_allows_when_no_file_path() -> None: result = _run({"tool_input": {}}) assert result.returncode == 0 + + +@_BASH_AVAILABLE +def test_allows_when_tool_input_missing() -> None: + result = _run({}) + assert result.returncode == 0 + + +@_BASH_AVAILABLE +def test_allows_malformed_json_envelope() -> None: + """Malformed stdin must not crash the hook; falls through to allow. + + Choosing fail-closed (block) here would brick Edit/Write across the entire + session if jq itself misbehaved or the harness sent a non-JSON envelope. + Allowing through is the deliberate posture: the pre-commit gate + `scripts/check_no_em_dashes.py` is the diff-time backstop. + """ + assert _BASH is not None + result = subprocess.run( # noqa: S603 + [_BASH, str(_SCRIPT)], + input="this is not json at all { ", + capture_output=True, + text=True, + check=False, + encoding="utf-8", + ) + assert result.returncode == 0 From 35ed842c54808170f578d48c0a48987d95be9d6b Mon Sep 17 00:00:00 2001 From: Aurelio <19254254+Aureliolo@users.noreply.github.com> Date: Sun, 10 May 2026 13:02:41 +0200 Subject: [PATCH 4/6] fix: babysit round 2, 4 findings (3 reviewer + 1 codeql) - scripts/check_baseline_growth.py:_count_json_entries fallback to len(payload) when 'locations' key is missing, so flat-dict baselines surface growth instead of returning sentinel 0 (gemini) - scripts/check_baseline_growth.py: validate resolved baseline path stays under REPO_ROOT before reading; closes CodeQL alert #285 (py/path-injection) - .pre-commit-config.yaml: hook regex now accepts underscore-prefixed JSON/TXT baselines like scripts/_workflow_shell_git_commits_baseline.json (CodeRabbit) - scripts/check_no_em_dashes_hook.sh: normalise backslashes before changelog exemption case-match so Windows paths hit the same skip path (CodeRabbit) - Tests: update json fallback expectation, add path-traversal guard test, add Windows-path changelog exemption parametrized cases --- .pre-commit-config.yaml | 2 +- scripts/check_baseline_growth.py | 28 ++++++++++++++-- scripts/check_no_em_dashes_hook.sh | 5 ++- .../scripts/test_check_baseline_growth.py | 33 +++++++++++++++++-- .../scripts/test_check_no_em_dashes_hook.py | 28 ++++++++++++++++ 5 files changed, 90 insertions(+), 6 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index fb84b9962b..fd5ed3c3a1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -220,7 +220,7 @@ repos: entry: uv args: [run, python, scripts/check_baseline_growth.py] language: system - files: ^scripts/(([a-z][a-z_]*_baseline\.(txt|json))|(_[a-z][a-z_]*_baseline\.py))$ + files: ^scripts/(([a-z_][a-z_]*_baseline\.(txt|json))|(_[a-z][a-z_]*_baseline\.py))$ - id: no-release-please-token name: forbid new RELEASE_PLEASE_TOKEN references diff --git a/scripts/check_baseline_growth.py b/scripts/check_baseline_growth.py index a9a833c4d4..8d62729de4 100644 --- a/scripts/check_baseline_growth.py +++ b/scripts/check_baseline_growth.py @@ -49,6 +49,11 @@ def _count_json_entries(text: str) -> int: A corrupt baseline must block the commit, never silently pass. The previous sentinel return of -1 was less-than every non-negative ``head_count``, which let malformed baselines slip through the ``staged > head`` comparison. + + For dict payloads, prefer the ``locations`` key (the canonical shape used + by gate baselines). When ``locations`` is missing or non-collection, fall + back to counting top-level keys so a flat-dict baseline format still + surfaces growth instead of silently returning 0. """ try: payload = json.loads(text) @@ -61,13 +66,14 @@ def _count_json_entries(text: str) -> int: return len(locations) if isinstance(locations, list): return len(locations) - return 0 + return len(payload) if isinstance(payload, list): return len(payload) return 0 def _count_text_entries(text: str) -> int: + """Count non-blank, non-comment lines in a text baseline.""" return sum( 1 for line in text.splitlines() @@ -108,6 +114,7 @@ def _read_head(path: str) -> str | None: def _classify(path: str) -> str: + """Return the file-suffix tag (``.json`` / ``.py`` / ``.txt``) for a baseline path.""" if path.endswith(".json"): return ".json" if path.endswith(".py"): @@ -115,6 +122,21 @@ def _classify(path: str) -> str: return ".txt" +def _safe_baseline_path(path: str) -> Path | None: + """Resolve ``path`` against ``REPO_ROOT`` and confirm it stays inside the repo. + + Defends against path-injection: pre-commit feeds this gate a list of staged + paths from argv. ``_is_baseline_path`` already restricts the shape, but a + resolved path that escapes ``REPO_ROOT`` (symlinks, exotic basenames) must + still be rejected before any filesystem read. + """ + repo_root = REPO_ROOT.resolve() + candidate = (repo_root / path).resolve() + if candidate == repo_root or not candidate.is_relative_to(repo_root): + return None + return candidate + + def _inspect_path( path: str, grown: list[tuple[str, int, int]], @@ -122,7 +144,9 @@ def _inspect_path( ) -> None: """Compare one staged baseline against HEAD; record growth or parse failure.""" suffix = _classify(path) - absolute = REPO_ROOT / path + absolute = _safe_baseline_path(path) + if absolute is None: + return try: staged_text = absolute.read_text(encoding="utf-8") except OSError: diff --git a/scripts/check_no_em_dashes_hook.sh b/scripts/check_no_em_dashes_hook.sh index 696aab0883..db1f4e97a9 100755 --- a/scripts/check_no_em_dashes_hook.sh +++ b/scripts/check_no_em_dashes_hook.sh @@ -33,7 +33,10 @@ if [[ -z "$FILE_PATH" ]]; then fi # Skip regenerated changelog (release-please rewrites this file from git history). -case "$FILE_PATH" in +# Normalise Windows-style backslashes to forward slashes so the case patterns +# below match `C:\repo\.github\CHANGELOG.md` the same as the POSIX form. +NORMALISED_FILE_PATH=${FILE_PATH//\\//} +case "$NORMALISED_FILE_PATH" in */.github/CHANGELOG.md|.github/CHANGELOG.md) exit 0 ;; diff --git a/tests/unit/scripts/test_check_baseline_growth.py b/tests/unit/scripts/test_check_baseline_growth.py index 5891edefce..34a5625ff9 100644 --- a/tests/unit/scripts/test_check_baseline_growth.py +++ b/tests/unit/scripts/test_check_baseline_growth.py @@ -95,8 +95,18 @@ def test_count_json_entries_invalid_raises() -> None: _MODULE._count_json_entries("not json") -def test_count_json_entries_no_locations_dict() -> None: - assert _MODULE._count_json_entries(json.dumps({"foo": "bar"})) == 0 +def test_count_json_entries_no_locations_dict_falls_back_to_top_level_keys() -> None: + """A flat-dict baseline (no ``locations`` key) falls back to top-level keys. + + The earlier sentinel of 0 created a loophole: any flat-dict format would + return 0 for both staged and HEAD, so growth (``staged > head``) was + never detected. Counting top-level keys instead lets the gate catch + additions even on unconventional baseline shapes. + """ + assert _MODULE._count_json_entries(json.dumps({"foo": "bar"})) == 1 + assert ( + _MODULE._count_json_entries(json.dumps({"a": 1, "b": 2, "c": 3, "d": 4})) == 4 + ) # ── classification ────────────────────────────────────────────── @@ -291,6 +301,25 @@ def test_main_skips_unreadable_staged_file( assert rc == 0 +def test_safe_baseline_path_rejects_repo_escape( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """``_safe_baseline_path`` rejects paths that resolve outside REPO_ROOT. + + Defends against path-injection from untrusted argv: even if a baseline-shaped + string slips through ``_is_baseline_path``, the resolved location must remain + inside the repository before the gate touches the filesystem. + """ + monkeypatch.setattr(_MODULE, "REPO_ROOT", tmp_path) + assert _MODULE._safe_baseline_path("../escape_baseline.txt") is None + assert _MODULE._safe_baseline_path("scripts/../../escape_baseline.txt") is None + assert ( + _MODULE._safe_baseline_path("scripts/legit_baseline.txt") + == (tmp_path / "scripts" / "legit_baseline.txt").resolve() + ) + + def test_main_handles_multiple_paths_mixed_states( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, diff --git a/tests/unit/scripts/test_check_no_em_dashes_hook.py b/tests/unit/scripts/test_check_no_em_dashes_hook.py index 57a0657b38..c2953a9323 100644 --- a/tests/unit/scripts/test_check_no_em_dashes_hook.py +++ b/tests/unit/scripts/test_check_no_em_dashes_hook.py @@ -126,6 +126,34 @@ def test_skips_changelog_md_absolute_path() -> None: assert result.returncode == 0 +@_BASH_AVAILABLE +@pytest.mark.parametrize( + "windows_path", + [ + r"C:\repo\.github\CHANGELOG.md", + r".github\CHANGELOG.md", + r"D:\projects\synthorg\.github\CHANGELOG.md", + ], +) +def test_skips_changelog_md_windows_path(windows_path: str) -> None: + """Windows-style backslash paths must hit the same exemption as POSIX. + + Without normalisation, the case pattern only matches ``/``-separated paths + and a Windows path like ``C:\\repo\\.github\\CHANGELOG.md`` would be blocked + even though changelog writes are exempt. The hook normalises backslashes + to forward slashes before the case match. + """ + result = _run( + { + "tool_input": { + "file_path": windows_path, + "content": f"hello {_EM_DASH} world", + }, + }, + ) + assert result.returncode == 0 + + @_BASH_AVAILABLE def test_allows_empty_content() -> None: result = _run({"tool_input": {"file_path": "src/foo.py", "content": ""}}) From 55403bab69db7e2c0f9971f8d61ea58d31e7d5fd Mon Sep 17 00:00:00 2001 From: Aurelio <19254254+Aureliolo@users.noreply.github.com> Date: Sun, 10 May 2026 13:27:59 +0200 Subject: [PATCH 5/6] fix: babysit round 3, 4 findings (2 coderabbit + 2 codeql) - scripts/check_baseline_growth.py: replace _safe_baseline_path resolve()/is_relative_to() with regex-validated basename approach so the user-supplied directory is discarded and only a known-safe REPO_ROOT/scripts/ path is constructed; closes CodeQL alerts #285 (line 151) and #286 (line 134) - scripts/check_baseline_growth.py: emit stderr WARNING when InvalidBaselineError is raised parsing HEAD content instead of silently falling back to head_count=0 (coderabbit) - scripts/check_no_edit_baseline.sh: normalise backslashes before case-match so Windows paths like C:\repo\scripts\foo_baseline.txt hit the same protection as POSIX (coderabbit) - Tests: rewrite path-injection test for regex sanitisation, add corrupt-HEAD warning test, new tests/unit/scripts/test_check_no_edit_baseline.py covering POSIX + Windows-path + non-baseline + malformed-envelope cases --- scripts/check_baseline_growth.py | 35 +++-- scripts/check_no_edit_baseline.sh | 6 +- .../scripts/test_check_baseline_growth.py | 74 +++++++-- .../scripts/test_check_no_edit_baseline.py | 141 ++++++++++++++++++ 4 files changed, 235 insertions(+), 21 deletions(-) create mode 100644 tests/unit/scripts/test_check_no_edit_baseline.py diff --git a/scripts/check_baseline_growth.py b/scripts/check_baseline_growth.py index 8d62729de4..6427464124 100644 --- a/scripts/check_baseline_growth.py +++ b/scripts/check_baseline_growth.py @@ -17,6 +17,7 @@ import json import os +import re import subprocess import sys from pathlib import Path @@ -26,6 +27,9 @@ EXIT_GROWTH_DETECTED = 1 EXIT_INVALID_BASELINE = 2 +_SCRIPTS_DIRNAME = "scripts" +_BASELINE_BASENAME_RE = re.compile(r"^_?[a-z][a-z_]*_baseline\.(?:txt|json|py)$") + class InvalidBaselineError(Exception): """Raised when a staged baseline file cannot be parsed.""" @@ -123,18 +127,25 @@ def _classify(path: str) -> str: def _safe_baseline_path(path: str) -> Path | None: - """Resolve ``path`` against ``REPO_ROOT`` and confirm it stays inside the repo. + """Convert a staged baseline path into an absolute ``Path`` inside ``REPO_ROOT``. Defends against path-injection: pre-commit feeds this gate a list of staged - paths from argv. ``_is_baseline_path`` already restricts the shape, but a - resolved path that escapes ``REPO_ROOT`` (symlinks, exotic basenames) must - still be rejected before any filesystem read. + paths from argv. Rather than passing the user-controlled string into + ``Path.resolve()`` and hoping the resolved location stays inside the repo, + extract the basename, validate it against a strict regex, and construct the + filesystem path from a hardcoded directory plus the validated basename. The + user-supplied directory portion is discarded; only the regex-clean basename + is joined to ``REPO_ROOT / "scripts"``. Returns ``None`` for any input that + does not match the canonical ``scripts/`` shape. """ - repo_root = REPO_ROOT.resolve() - candidate = (repo_root / path).resolve() - if candidate == repo_root or not candidate.is_relative_to(repo_root): + if not path.startswith(f"{_SCRIPTS_DIRNAME}/"): + return None + basename = path[len(_SCRIPTS_DIRNAME) + 1 :] + if "/" in basename or "\\" in basename: + return None + if not _BASELINE_BASENAME_RE.fullmatch(basename): return None - return candidate + return REPO_ROOT / _SCRIPTS_DIRNAME / basename def _inspect_path( @@ -161,7 +172,13 @@ def _inspect_path( if head_text is not None: try: head_count = _staged_entries(head_text, suffix) - except InvalidBaselineError: + except InvalidBaselineError as exc: + print( + f"WARNING: HEAD baseline {path} failed to parse ({exc}); " + "falling back to head_count=0. The growth check may be " + "over-strict for this file until HEAD is repaired.", + file=sys.stderr, + ) head_count = 0 if staged_count > head_count: grown.append((path, head_count, staged_count)) diff --git a/scripts/check_no_edit_baseline.sh b/scripts/check_no_edit_baseline.sh index 4169ad50a5..d6b9581b1a 100644 --- a/scripts/check_no_edit_baseline.sh +++ b/scripts/check_no_edit_baseline.sh @@ -23,9 +23,13 @@ if [[ -z "$FILE_PATH" ]]; then exit 0 fi +# Normalise Windows-style backslashes to forward slashes so the case patterns +# below match `C:\repo\scripts\foo_baseline.txt` the same as the POSIX form. +NORMALISED_FILE_PATH=${FILE_PATH//\\//} + REASON="" -case "$FILE_PATH" in +case "$NORMALISED_FILE_PATH" in */tests/baselines/*.json|tests/baselines/*.json) REASON="Test timing baselines require explicit user approval to modify. Do not bump baselines to bypass regression guards -- fix the source code or tests instead." ;; diff --git a/tests/unit/scripts/test_check_baseline_growth.py b/tests/unit/scripts/test_check_baseline_growth.py index 34a5625ff9..c219224ebd 100644 --- a/tests/unit/scripts/test_check_baseline_growth.py +++ b/tests/unit/scripts/test_check_baseline_growth.py @@ -301,23 +301,75 @@ def test_main_skips_unreadable_staged_file( assert rc == 0 -def test_safe_baseline_path_rejects_repo_escape( +def test_safe_baseline_path_rejects_path_injection( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, ) -> None: - """``_safe_baseline_path`` rejects paths that resolve outside REPO_ROOT. + """``_safe_baseline_path`` rejects any input outside the canonical shape. - Defends against path-injection from untrusted argv: even if a baseline-shaped - string slips through ``_is_baseline_path``, the resolved location must remain - inside the repository before the gate touches the filesystem. + Defends against path-injection from untrusted argv. The implementation + validates the basename against a strict regex and joins it to a hardcoded + ``REPO_ROOT / "scripts"``, so even a baseline-shaped string that slips + through ``_is_baseline_path`` cannot redirect the filesystem read. """ monkeypatch.setattr(_MODULE, "REPO_ROOT", tmp_path) - assert _MODULE._safe_baseline_path("../escape_baseline.txt") is None - assert _MODULE._safe_baseline_path("scripts/../../escape_baseline.txt") is None - assert ( - _MODULE._safe_baseline_path("scripts/legit_baseline.txt") - == (tmp_path / "scripts" / "legit_baseline.txt").resolve() - ) + rejected = [ + "../escape_baseline.txt", + "scripts/../../escape_baseline.txt", + r"scripts/..\..\escape_baseline.txt", + "scripts/subdir/legit_baseline.txt", + "scripts/legit_baseline.exe", + "scripts/Capitalised_Baseline.txt", + "scripts/legit_baseline", + "scripts/legit-baseline.txt", + ] + for path in rejected: + assert _MODULE._safe_baseline_path(path) is None, path + accepted = { + "scripts/legit_baseline.txt": "legit_baseline.txt", + "scripts/_workflow_shell_git_commits_baseline.json": ( + "_workflow_shell_git_commits_baseline.json" + ), + "scripts/_schema_drift_baseline.py": "_schema_drift_baseline.py", + } + for path, basename in accepted.items(): + assert _MODULE._safe_baseline_path(path) == tmp_path / "scripts" / basename, ( + path + ) + + +def test_inspect_path_warns_on_corrupt_head_baseline( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + """A corrupt HEAD baseline emits a stderr warning before falling back to 0. + + Silent fallback masked HEAD corruption; the warning makes the over-strict + growth check visible without changing the safe behaviour (head_count=0 + still rejects any non-empty staged baseline as growth). + """ + baseline = tmp_path / "scripts" / "_corrupt_head_baseline.json" + baseline.parent.mkdir(parents=True) + baseline.write_text(json.dumps({"locations": {"a": 1}}), encoding="utf-8") + monkeypatch.setattr(_MODULE, "REPO_ROOT", tmp_path) + monkeypatch.delenv("ALLOW_BASELINE_GROWTH", raising=False) + with patch.object( + _MODULE, + "_read_head", + return_value="{ this is not valid json ", + ): + rc: int = _MODULE.main( + [ + "check_baseline_growth.py", + "scripts/_corrupt_head_baseline.json", + ], + ) + assert rc == _MODULE.EXIT_GROWTH_DETECTED + captured = capsys.readouterr() + assert "WARNING: HEAD baseline" in captured.err + assert "_corrupt_head_baseline.json" in captured.err + assert "failed to parse" in captured.err def test_main_handles_multiple_paths_mixed_states( diff --git a/tests/unit/scripts/test_check_no_edit_baseline.py b/tests/unit/scripts/test_check_no_edit_baseline.py new file mode 100644 index 0000000000..7975e82211 --- /dev/null +++ b/tests/unit/scripts/test_check_no_edit_baseline.py @@ -0,0 +1,141 @@ +"""Unit tests for ``scripts/check_no_edit_baseline.sh``. + +The hook is a Bash PreToolUse intercept that fires before Edit/Write lands +on a protected baseline file. We invoke it as a subprocess feeding mock +JSON envelopes on stdin and assert on exit codes + stdout / stderr. +""" + +import json +import shutil +import subprocess +from pathlib import Path + +import pytest + +pytestmark = pytest.mark.unit + +_REPO_ROOT = Path(__file__).resolve().parent.parent.parent.parent +_SCRIPT = _REPO_ROOT / "scripts" / "check_no_edit_baseline.sh" + +_BASH = shutil.which("bash") +_BASH_AVAILABLE = pytest.mark.skipif(_BASH is None, reason="bash not available") + + +def _run(envelope: dict[str, object]) -> subprocess.CompletedProcess[str]: + assert _BASH is not None + return subprocess.run( # noqa: S603 + [_BASH, str(_SCRIPT)], + input=json.dumps(envelope), + capture_output=True, + text=True, + check=False, + encoding="utf-8", + ) + + +@_BASH_AVAILABLE +def test_blocks_edit_of_text_baseline_posix() -> None: + result = _run( + { + "tool_input": { + "file_path": "scripts/mock_spec_baseline.txt", + "new_string": "src/x.py:1:1\n", + }, + }, + ) + assert result.returncode == 2 + + +@_BASH_AVAILABLE +def test_blocks_edit_of_json_baseline_posix() -> None: + result = _run( + { + "tool_input": { + "file_path": "scripts/_workflow_shell_git_commits_baseline.json", + "new_string": "{}", + }, + }, + ) + assert result.returncode == 2 + + +@_BASH_AVAILABLE +def test_blocks_edit_of_test_timing_baseline_posix() -> None: + result = _run( + { + "tool_input": { + "file_path": "tests/baselines/unit_timing.json", + "new_string": "{}", + }, + }, + ) + assert result.returncode == 2 + + +@_BASH_AVAILABLE +def test_allows_non_baseline_path() -> None: + result = _run( + { + "tool_input": { + "file_path": "src/synthorg/foo.py", + "new_string": "x = 1", + }, + }, + ) + assert result.returncode == 0 + + +@_BASH_AVAILABLE +@pytest.mark.parametrize( + "windows_path", + [ + r"C:\repo\scripts\mock_spec_baseline.txt", + r"scripts\mock_spec_baseline.txt", + r"D:\projects\synthorg\scripts\_workflow_shell_git_commits_baseline.json", + r"C:\repo\tests\baselines\unit_timing.json", + ], +) +def test_blocks_edit_of_baseline_via_windows_path(windows_path: str) -> None: + """Windows-style backslash paths must hit the same protection as POSIX. + + Without normalisation, the case patterns only match ``/``-separated paths + and a Windows path like ``C:\\repo\\scripts\\mock_spec_baseline.txt`` + would slip past the gate. The hook normalises backslashes before the + case match. + """ + result = _run( + { + "tool_input": { + "file_path": windows_path, + "new_string": "src/x.py:1:1\n", + }, + }, + ) + assert result.returncode == 2 + + +@_BASH_AVAILABLE +def test_allows_when_no_file_path() -> None: + result = _run({"tool_input": {}}) + assert result.returncode == 0 + + +@_BASH_AVAILABLE +def test_allows_malformed_json_envelope() -> None: + """Malformed stdin must not crash the hook; falls through to allow. + + Choosing fail-closed (block) here would brick Edit/Write across the + entire session if jq itself misbehaved or the harness sent a non-JSON + envelope. Allowing through is the deliberate posture: the in-repo + pre-commit baseline-growth gate is the diff-time backstop. + """ + assert _BASH is not None + result = subprocess.run( # noqa: S603 + [_BASH, str(_SCRIPT)], + input="this is not json at all { ", + capture_output=True, + text=True, + check=False, + encoding="utf-8", + ) + assert result.returncode == 0 From deb0926e53072e15009a9f3dcdcd85cf12533f9b Mon Sep 17 00:00:00 2001 From: Aurelio <19254254+Aureliolo@users.noreply.github.com> Date: Sun, 10 May 2026 13:52:22 +0200 Subject: [PATCH 6/6] fix: babysit round 4, 4 findings (3 coderabbit + 1 codeql) - scripts/check_baseline_growth.py: unify _is_baseline_path and _safe_baseline_path under a single _BASELINE_BASENAME_RE (canonical regex matching txt/json with optional leading underscore + py with required leading underscore); remove _safe_baseline_path entirely so the gate never touches the user-controlled filesystem path (coderabbit) - scripts/check_baseline_growth.py: read staged content via 'git show :' instead of Path.read_text(); index reads reflect what is actually staged and remove the path-injection sink, closing CodeQL alert #285 (coderabbit + codeql) - tests/unit/scripts/test_check_baseline_growth.py: switch tmp_path/REPO_ROOT-monkeypatched fixtures to mocks of _read_staged + _read_head; broaden test_is_baseline_path with traversal/Windows-separator/uppercase/extension cases; add _read_staged subprocess.run tests - tests/unit/scripts/test_check_no_edit_baseline.py: add test_blocks_edit_of_python_baseline_posix and a Windows-path .py case (coderabbit) --- scripts/check_baseline_growth.py | 78 +++-- .../scripts/test_check_baseline_growth.py | 266 +++++++++++------- .../scripts/test_check_no_edit_baseline.py | 15 + 3 files changed, 220 insertions(+), 139 deletions(-) diff --git a/scripts/check_baseline_growth.py b/scripts/check_baseline_growth.py index 6427464124..099be05d26 100644 --- a/scripts/check_baseline_growth.py +++ b/scripts/check_baseline_growth.py @@ -20,15 +20,23 @@ import re import subprocess import sys -from pathlib import Path -REPO_ROOT = Path(__file__).resolve().parent.parent EXIT_OK = 0 EXIT_GROWTH_DETECTED = 1 EXIT_INVALID_BASELINE = 2 _SCRIPTS_DIRNAME = "scripts" -_BASELINE_BASENAME_RE = re.compile(r"^_?[a-z][a-z_]*_baseline\.(?:txt|json|py)$") + +# Single regex governing every "is this a gate-suppression baseline" decision. +# Three legitimate shapes: +# - ``_baseline.txt`` / ``_baseline.json`` (optional leading ``_``) +# - ``__baseline.py`` (leading ``_`` REQUIRED for py-format baselines) +# Everything else is rejected so ``_is_baseline_path`` and any downstream check +# stay consistent: a previous split between an ``endswith()`` filter and a +# regex sanitiser let some shapes pass one check and not the other. +_BASELINE_BASENAME_RE = re.compile( + r"^(?:_?[a-z][a-z_]*_baseline\.(?:txt|json)|_[a-z][a-z_]*_baseline\.py)$" +) class InvalidBaselineError(Exception): @@ -36,15 +44,18 @@ class InvalidBaselineError(Exception): def _is_baseline_path(path: str) -> bool: - """Return ``True`` for paths that this gate should compare against HEAD.""" - if not path.startswith("scripts/"): + """Return ``True`` for paths this gate should compare against HEAD. + + Validates against ``_BASELINE_BASENAME_RE`` so every code path agrees on + which shapes count as a baseline. Rejects nested paths and any basename + containing path separators (POSIX or Windows-style). + """ + if not path.startswith(f"{_SCRIPTS_DIRNAME}/"): return False - name = path[len("scripts/") :] - if "/" in name: + basename = path[len(_SCRIPTS_DIRNAME) + 1 :] + if "/" in basename or "\\" in basename: return False - if name.endswith(("_baseline.txt", "_baseline.json")): - return True - return name.startswith("_") and name.endswith("_baseline.py") + return _BASELINE_BASENAME_RE.fullmatch(basename) is not None def _count_json_entries(text: str) -> int: @@ -126,26 +137,35 @@ def _classify(path: str) -> str: return ".txt" -def _safe_baseline_path(path: str) -> Path | None: - """Convert a staged baseline path into an absolute ``Path`` inside ``REPO_ROOT``. +def _read_staged(path: str) -> str | None: + """Return the staged-blob content for ``path`` from the git index, or ``None``. - Defends against path-injection: pre-commit feeds this gate a list of staged - paths from argv. Rather than passing the user-controlled string into - ``Path.resolve()`` and hoping the resolved location stays inside the repo, - extract the basename, validate it against a strict regex, and construct the - filesystem path from a hardcoded directory plus the validated basename. The - user-supplied directory portion is discarded; only the regex-clean basename - is joined to ``REPO_ROOT / "scripts"``. Returns ``None`` for any input that - does not match the canonical ``scripts/`` shape. + Reads via ``git show :`` so the gate compares what is actually + staged for the commit, not whatever happens to be on disk. A working-tree + read could drift from the index (post-stage edit, file removed after + staging) and let growth slip through. Reading from the index also avoids + any filesystem touch on the user-controlled argv path, closing the + path-injection class entirely. + + Treats a missing ``git`` binary, the path not being staged, and other OS + errors as "no staged content" so the caller skips silently rather than + crashing on infrastructure problems pre-commit cannot reach in practice. """ - if not path.startswith(f"{_SCRIPTS_DIRNAME}/"): + try: + result = subprocess.run( + ["git", "show", f":{path}"], + check=False, + capture_output=True, + text=True, + ) + except FileNotFoundError: return None - basename = path[len(_SCRIPTS_DIRNAME) + 1 :] - if "/" in basename or "\\" in basename: + except OSError as exc: + print(f"WARNING: git show :{path} failed: {exc}", file=sys.stderr) return None - if not _BASELINE_BASENAME_RE.fullmatch(basename): + if result.returncode != 0: return None - return REPO_ROOT / _SCRIPTS_DIRNAME / basename + return result.stdout def _inspect_path( @@ -155,12 +175,8 @@ def _inspect_path( ) -> None: """Compare one staged baseline against HEAD; record growth or parse failure.""" suffix = _classify(path) - absolute = _safe_baseline_path(path) - if absolute is None: - return - try: - staged_text = absolute.read_text(encoding="utf-8") - except OSError: + staged_text = _read_staged(path) + if staged_text is None: return try: staged_count = _staged_entries(staged_text, suffix) diff --git a/tests/unit/scripts/test_check_baseline_growth.py b/tests/unit/scripts/test_check_baseline_growth.py index c219224ebd..999605b1c7 100644 --- a/tests/unit/scripts/test_check_baseline_growth.py +++ b/tests/unit/scripts/test_check_baseline_growth.py @@ -41,16 +41,32 @@ def _load_script_module() -> ModuleType: @pytest.mark.parametrize( ("path", "expected"), [ + # Accepted shapes ("scripts/mock_spec_baseline.txt", True), ("scripts/no_magic_numbers_baseline.txt", True), ("scripts/_workflow_shell_git_commits_baseline.json", True), + ("scripts/legit_baseline.json", True), ("scripts/_schema_drift_baseline.py", True), + # Wrong location + ("tests/baselines/unit_timing.json", False), + ("README.md", False), + # Wrong extension / shape ("scripts/check_mock_spec.py", False), ("scripts/check_no_edit_baseline.sh", False), ("scripts/no_baseline_at_all.txt", False), - ("tests/baselines/unit_timing.json", False), + # py-format baseline must start with leading underscore + ("scripts/legit_baseline.py", False), + # Nested paths and traversal sequences must be rejected ("scripts/subdir/mock_spec_baseline.txt", False), - ("README.md", False), + ("scripts/../escape_baseline.txt", False), + (r"scripts/..\escape_baseline.txt", False), + # Disallowed character classes in basename + ("scripts/Capitalised_Baseline.txt", False), + ("scripts/legit-baseline.txt", False), + ("scripts/legit_baseline.exe", False), + # Anchored at scripts/ specifically + ("../escape_baseline.txt", False), + ("scriptsX/legit_baseline.txt", False), ], ) def test_is_baseline_path(path: str, expected: bool) -> None: @@ -141,22 +157,21 @@ def test_main_bypass_via_env(monkeypatch: pytest.MonkeyPatch) -> None: def test_main_detects_growth( - tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - baseline = tmp_path / "scripts" / "fake_baseline.txt" - baseline.parent.mkdir(parents=True) - baseline.write_text( - "# header\nsrc/a.py:1:1\nsrc/b.py:2:2\nsrc/c.py:3:3\n", - encoding="utf-8", - ) - monkeypatch.setattr(_MODULE, "REPO_ROOT", tmp_path) monkeypatch.delenv("ALLOW_BASELINE_GROWTH", raising=False) - with patch.object( - _MODULE, - "_read_head", - return_value="# header\nsrc/a.py:1:1\n", + with ( + patch.object( + _MODULE, + "_read_staged", + return_value="# header\nsrc/a.py:1:1\nsrc/b.py:2:2\nsrc/c.py:3:3\n", + ), + patch.object( + _MODULE, + "_read_head", + return_value="# header\nsrc/a.py:1:1\n", + ), ): rc: int = _MODULE.main( ["check_baseline_growth.py", "scripts/fake_baseline.txt"], @@ -168,22 +183,15 @@ def test_main_detects_growth( assert "ALLOW_BASELINE_GROWTH=1" in captured.err -def test_main_allows_shrink( - tmp_path: Path, - monkeypatch: pytest.MonkeyPatch, -) -> None: - baseline = tmp_path / "scripts" / "fake_baseline.txt" - baseline.parent.mkdir(parents=True) - baseline.write_text( - "# header\nsrc/a.py:1:1\n", - encoding="utf-8", - ) - monkeypatch.setattr(_MODULE, "REPO_ROOT", tmp_path) +def test_main_allows_shrink(monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.delenv("ALLOW_BASELINE_GROWTH", raising=False) - with patch.object( - _MODULE, - "_read_head", - return_value="# header\nsrc/a.py:1:1\nsrc/b.py:2:2\nsrc/c.py:3:3\n", + with ( + patch.object(_MODULE, "_read_staged", return_value="# header\nsrc/a.py:1:1\n"), + patch.object( + _MODULE, + "_read_head", + return_value="# header\nsrc/a.py:1:1\nsrc/b.py:2:2\nsrc/c.py:3:3\n", + ), ): rc: int = _MODULE.main( ["check_baseline_growth.py", "scripts/fake_baseline.txt"], @@ -192,18 +200,12 @@ def test_main_allows_shrink( def test_main_treats_missing_head_as_empty( - tmp_path: Path, monkeypatch: pytest.MonkeyPatch, ) -> None: - baseline = tmp_path / "scripts" / "new_baseline.txt" - baseline.parent.mkdir(parents=True) - baseline.write_text("src/a.py:1:1\n", encoding="utf-8") - monkeypatch.setattr(_MODULE, "REPO_ROOT", tmp_path) monkeypatch.delenv("ALLOW_BASELINE_GROWTH", raising=False) - with patch.object( - _MODULE, - "_read_head", - return_value=None, + with ( + patch.object(_MODULE, "_read_staged", return_value="src/a.py:1:1\n"), + patch.object(_MODULE, "_read_head", return_value=None), ): rc: int = _MODULE.main( ["check_baseline_growth.py", "scripts/new_baseline.txt"], @@ -263,16 +265,18 @@ def fake_run( def test_main_blocks_invalid_json_baseline( - tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - baseline = tmp_path / "scripts" / "_fake_baseline.json" - baseline.parent.mkdir(parents=True) - baseline.write_text("{ this is not valid json ", encoding="utf-8") - monkeypatch.setattr(_MODULE, "REPO_ROOT", tmp_path) monkeypatch.delenv("ALLOW_BASELINE_GROWTH", raising=False) - with patch.object(_MODULE, "_read_head", return_value="{}"): + with ( + patch.object( + _MODULE, + "_read_staged", + return_value="{ this is not valid json ", + ), + patch.object(_MODULE, "_read_head", return_value="{}"), + ): rc: int = _MODULE.main( ["check_baseline_growth.py", "scripts/_fake_baseline.json"], ) @@ -282,64 +286,25 @@ def test_main_blocks_invalid_json_baseline( assert "failed to parse" in captured.err -def test_main_skips_unreadable_staged_file( - tmp_path: Path, +def test_main_skips_when_path_not_in_index( monkeypatch: pytest.MonkeyPatch, ) -> None: - """A staged file that disappears between staging and the gate is silently skipped. + """A baseline path missing from the git index is silently skipped. Pre-commit cannot reach this case in practice because the file is staged, - but a hostile filesystem (anti-virus quarantine, race with cleanup) could - surface OSError. Skipping silently matches the intent: the gate has nothing - to compare against. + but ``git show :`` will return non-zero (and ``_read_staged`` will + return ``None``) for a path that is not in the index. The gate has + nothing to compare against, so it skips. """ - monkeypatch.setattr(_MODULE, "REPO_ROOT", tmp_path) monkeypatch.delenv("ALLOW_BASELINE_GROWTH", raising=False) - rc: int = _MODULE.main( - ["check_baseline_growth.py", "scripts/missing_baseline.txt"], - ) - assert rc == 0 - - -def test_safe_baseline_path_rejects_path_injection( - tmp_path: Path, - monkeypatch: pytest.MonkeyPatch, -) -> None: - """``_safe_baseline_path`` rejects any input outside the canonical shape. - - Defends against path-injection from untrusted argv. The implementation - validates the basename against a strict regex and joins it to a hardcoded - ``REPO_ROOT / "scripts"``, so even a baseline-shaped string that slips - through ``_is_baseline_path`` cannot redirect the filesystem read. - """ - monkeypatch.setattr(_MODULE, "REPO_ROOT", tmp_path) - rejected = [ - "../escape_baseline.txt", - "scripts/../../escape_baseline.txt", - r"scripts/..\..\escape_baseline.txt", - "scripts/subdir/legit_baseline.txt", - "scripts/legit_baseline.exe", - "scripts/Capitalised_Baseline.txt", - "scripts/legit_baseline", - "scripts/legit-baseline.txt", - ] - for path in rejected: - assert _MODULE._safe_baseline_path(path) is None, path - accepted = { - "scripts/legit_baseline.txt": "legit_baseline.txt", - "scripts/_workflow_shell_git_commits_baseline.json": ( - "_workflow_shell_git_commits_baseline.json" - ), - "scripts/_schema_drift_baseline.py": "_schema_drift_baseline.py", - } - for path, basename in accepted.items(): - assert _MODULE._safe_baseline_path(path) == tmp_path / "scripts" / basename, ( - path + with patch.object(_MODULE, "_read_staged", return_value=None): + rc: int = _MODULE.main( + ["check_baseline_growth.py", "scripts/missing_baseline.txt"], ) + assert rc == 0 def test_inspect_path_warns_on_corrupt_head_baseline( - tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: @@ -349,15 +314,18 @@ def test_inspect_path_warns_on_corrupt_head_baseline( growth check visible without changing the safe behaviour (head_count=0 still rejects any non-empty staged baseline as growth). """ - baseline = tmp_path / "scripts" / "_corrupt_head_baseline.json" - baseline.parent.mkdir(parents=True) - baseline.write_text(json.dumps({"locations": {"a": 1}}), encoding="utf-8") - monkeypatch.setattr(_MODULE, "REPO_ROOT", tmp_path) monkeypatch.delenv("ALLOW_BASELINE_GROWTH", raising=False) - with patch.object( - _MODULE, - "_read_head", - return_value="{ this is not valid json ", + with ( + patch.object( + _MODULE, + "_read_staged", + return_value=json.dumps({"locations": {"a": 1}}), + ), + patch.object( + _MODULE, + "_read_head", + return_value="{ this is not valid json ", + ), ): rc: int = _MODULE.main( [ @@ -373,27 +341,31 @@ def test_inspect_path_warns_on_corrupt_head_baseline( def test_main_handles_multiple_paths_mixed_states( - tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - grew = tmp_path / "scripts" / "grew_baseline.txt" - shrank = tmp_path / "scripts" / "shrank_baseline.txt" - grew.parent.mkdir(parents=True) - grew.write_text("a\nb\nc\nd\n", encoding="utf-8") - shrank.write_text("a\n", encoding="utf-8") - monkeypatch.setattr(_MODULE, "REPO_ROOT", tmp_path) monkeypatch.delenv("ALLOW_BASELINE_GROWTH", raising=False) + staged_lookup = { + "scripts/grew_baseline.txt": "a\nb\nc\nd\n", + "scripts/shrank_baseline.txt": "a\n", + "scripts/missing_baseline.txt": None, + } head_lookup = { "scripts/grew_baseline.txt": "a\nb\n", "scripts/shrank_baseline.txt": "a\nb\nc\n", "scripts/missing_baseline.txt": None, } + def fake_read_staged(path: str) -> str | None: + return staged_lookup[path] + def fake_read_head(path: str) -> str | None: return head_lookup[path] - with patch.object(_MODULE, "_read_head", side_effect=fake_read_head): + with ( + patch.object(_MODULE, "_read_staged", side_effect=fake_read_staged), + patch.object(_MODULE, "_read_head", side_effect=fake_read_head), + ): rc: int = _MODULE.main( [ "check_baseline_growth.py", @@ -407,3 +379,81 @@ def fake_read_head(path: str) -> str | None: assert "grew_baseline.txt" in captured.err assert "shrank_baseline.txt" not in captured.err assert "missing_baseline.txt" not in captured.err + + +# ── _read_staged ──────────────────────────────────────────────── + + +def test_read_staged_returns_none_for_path_not_in_index( + monkeypatch: pytest.MonkeyPatch, +) -> None: + def fake_run( + cmd: list[str], + **kwargs: object, + ) -> subprocess.CompletedProcess[str]: + del kwargs + return subprocess.CompletedProcess( + args=cmd, + returncode=128, + stdout="", + stderr="fatal: path 'scripts/x.txt' does not exist in the index", + ) + + monkeypatch.setattr(_MODULE.subprocess, "run", fake_run) + assert _MODULE._read_staged("scripts/x.txt") is None + + +def test_read_staged_returns_none_when_git_binary_missing( + monkeypatch: pytest.MonkeyPatch, +) -> None: + def fake_run( + cmd: list[str], + **kwargs: object, + ) -> subprocess.CompletedProcess[str]: + del cmd, kwargs + msg = "git binary not on PATH" + raise FileNotFoundError(msg) + + monkeypatch.setattr(_MODULE.subprocess, "run", fake_run) + assert _MODULE._read_staged("scripts/x.txt") is None + + +def test_read_staged_warns_on_unexpected_oserror( + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + def fake_run( + cmd: list[str], + **kwargs: object, + ) -> subprocess.CompletedProcess[str]: + del cmd, kwargs + msg = "git denied" + raise PermissionError(msg) + + monkeypatch.setattr(_MODULE.subprocess, "run", fake_run) + assert _MODULE._read_staged("scripts/some_baseline.txt") is None + captured = capsys.readouterr() + assert "git show" in captured.err + assert "some_baseline.txt" in captured.err + + +def test_read_staged_returns_blob_content( + monkeypatch: pytest.MonkeyPatch, +) -> None: + def fake_run( + cmd: list[str], + **kwargs: object, + ) -> subprocess.CompletedProcess[str]: + del kwargs + return subprocess.CompletedProcess( + args=cmd, + returncode=0, + stdout="src/a.py:1:1\nsrc/b.py:2:2\n", + stderr="", + ) + + monkeypatch.setattr(_MODULE.subprocess, "run", fake_run) + assert ( + _MODULE._read_staged("scripts/mock_spec_baseline.txt") + == "src/a.py:1:1\nsrc/b.py:2:2\n" + ) diff --git a/tests/unit/scripts/test_check_no_edit_baseline.py b/tests/unit/scripts/test_check_no_edit_baseline.py index 7975e82211..c3f6ebf60c 100644 --- a/tests/unit/scripts/test_check_no_edit_baseline.py +++ b/tests/unit/scripts/test_check_no_edit_baseline.py @@ -72,6 +72,20 @@ def test_blocks_edit_of_test_timing_baseline_posix() -> None: assert result.returncode == 2 +@_BASH_AVAILABLE +def test_blocks_edit_of_python_baseline_posix() -> None: + """Python-format gate baselines (``scripts/_*_baseline.py``) are protected too.""" + result = _run( + { + "tool_input": { + "file_path": "scripts/_workflow_shell_git_commits_baseline.py", + "new_string": "[]", + }, + }, + ) + assert result.returncode == 2 + + @_BASH_AVAILABLE def test_allows_non_baseline_path() -> None: result = _run( @@ -92,6 +106,7 @@ def test_allows_non_baseline_path() -> None: r"C:\repo\scripts\mock_spec_baseline.txt", r"scripts\mock_spec_baseline.txt", r"D:\projects\synthorg\scripts\_workflow_shell_git_commits_baseline.json", + r"D:\projects\synthorg\scripts\_workflow_shell_git_commits_baseline.py", r"C:\repo\tests\baselines\unit_timing.json", ], )