diff --git a/.github/scripts/lint_pr_body_refs.py b/.github/scripts/lint_pr_body_refs.py new file mode 100644 index 00000000..2c17f61d --- /dev/null +++ b/.github/scripts/lint_pr_body_refs.py @@ -0,0 +1,162 @@ +"""Issue #114 Phase 1 — PR-body refs lint. + +Walks a PR body looking for `#NUMBER` tokens. For each token, classifies as: + - structured (under a `## Linked issues` section OR preceded by + a recognised keyword: Closes/Fixes/Resolves/Refs/Related/See) + - bare (warning emitted) + +Always returns exit 0 — advisory check, never blocks merge. The CI +workflow surfaces warnings via stderr; reviewers can act on them +manually. + +SECURITY-CRITICAL: this script is the receiving end of the CI +workflow's ``--from-env PR_BODY`` invocation. The PR body is +contributor-editable text that flows directly from +``${{ github.event.pull_request.body }}`` through an environment +variable into ``os.environ[NAME]`` — no Bash interpreter in the +path. An earlier draft of the workflow used ``echo "$PR_BODY" > +/tmp/file`` which exposed OWASP A03 command-substitution injection +(`$(cmd)` expanded inside Bash double quotes). The ``--from-env`` +flag is the safe alternative; never restore the echo pattern. + +Stdlib only — no external deps. +""" + +from __future__ import annotations + +import argparse +import dataclasses +import os +import re +import sys +from collections.abc import Iterable + +_NUMBER_TOKEN_RE = re.compile(r"#(\d+)") +_KEYWORDS = ( + "closes", + "closed", + "fixes", + "fixed", + "resolves", + "resolved", + "refs", + "related to", + "related", + "see", +) +_LINKED_ISSUES_HEADING_RE = re.compile( + r"^\s*#{1,6}\s+linked\s+issues?\s*$", + re.IGNORECASE, +) + + +@dataclasses.dataclass(frozen=True) +class Warning: + """One bare-mention warning.""" + + line: int # 1-indexed + number: int # the issue number + + +# ── Public entry (≤ 30 LOC) ────────────────────────────────────────── + + +def lint_pr_body(body: str) -> list[Warning]: + """Walk a PR body's lines, classify each ``#NUMBER`` token, return + warnings for bare mentions. Pure function — no IO.""" + warnings: list[Warning] = [] + in_linked_section = False + for lineno, line in enumerate(body.splitlines(), start=1): + if _LINKED_ISSUES_HEADING_RE.match(line): + in_linked_section = True + continue + if in_linked_section and _is_other_heading(line): + in_linked_section = False + if in_linked_section: + continue + for match in _NUMBER_TOKEN_RE.finditer(line): + number = int(match.group(1)) + if not _has_preceding_keyword(line, match.start()): + warnings.append(Warning(line=lineno, number=number)) + return warnings + + +# ── Helpers (each ≤ 20 LOC) ────────────────────────────────────────── + + +def _is_other_heading(line: str) -> bool: + """Markdown heading that closes the linked-issues section.""" + return bool(re.match(r"^\s*#{1,6}\s+", line)) + + +def _has_preceding_keyword(line: str, token_start: int) -> bool: + """Return True when the `#NUMBER` token at ``token_start`` is + preceded on the same line by one of the recognised + issue-link keywords (case-insensitive). Looks back up to 32 + characters for a keyword match.""" + prefix = line[:token_start].lower() + return any(prefix.rstrip().endswith(kw) for kw in _KEYWORDS) + + +def _emit_warnings(warnings: Iterable[Warning], out=None) -> None: + """Print each warning to stderr in actionable form. ``out`` is + resolved at call-time (default ``sys.stderr``) so test harnesses + that replace ``sys.stderr`` (pytest capsys) capture the output + correctly — capturing the default at function-def time would + bind a stale reference.""" + target = out if out is not None else sys.stderr + for w in warnings: + print( + f"warning: bare '#{w.number}' on line {w.line} — wrap with " + f"'Closes #{w.number}' / 'Refs #{w.number}', or move to a " + "'Linked issues' section", + file=target, + ) + + +# ── CLI entry (≤ 25 LOC) ───────────────────────────────────────────── + + +def main(argv: list[str] | None = None) -> int: + """CLI entry. Body source — exactly one of: + --body — read PR body from file (local dev / tests) + --from-env — read PR body from env var (CI; security-critical + to avoid Bash shell interpolation of contributor- + controlled text — see module docstring) + + Always returns 0 (advisory check).""" + args = _parse_args(argv) + body = _read_body(args) + if body is None: + return 0 + _emit_warnings(lint_pr_body(body)) + return 0 + + +def _parse_args(argv: list[str] | None) -> argparse.Namespace: + parser = argparse.ArgumentParser(prog="lint_pr_body_refs") + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument("--body", help="path to PR body file") + group.add_argument( + "--from-env", + metavar="NAME", + help="environment variable to read PR body from (CI safe; no shell)", + ) + return parser.parse_args(argv) + + +def _read_body(args: argparse.Namespace) -> str | None: + """Read the PR body from whichever source the args specified. + Returns None when the source is missing — script exits 0 silently + rather than failing loudly (advisory).""" + if args.body: + try: + with open(args.body, encoding="utf-8") as fh: + return fh.read() + except OSError: + return None + return os.environ.get(args.from_env) + + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) diff --git a/.github/workflows/lint-and-typecheck.yml b/.github/workflows/lint-and-typecheck.yml index a8f8bd5d..611f48cd 100644 --- a/.github/workflows/lint-and-typecheck.yml +++ b/.github/workflows/lint-and-typecheck.yml @@ -22,3 +22,16 @@ jobs: run: ruff format --check . - name: Mypy run: mypy . + - name: Plan-grounding lint (#114 Check A) + # Only lints plan-*.md files modified in this PR — historical + # plans that referenced now-deleted files would otherwise block + # CI for unrelated work. Adding/modifying a plan still gates. + run: | + git fetch origin ${{ github.base_ref }} --depth=1 + PLANS=$(git diff --name-only "origin/${{ github.base_ref }}...HEAD" -- 'plan-*.md' 'docs/Planning/plan-*.md' 2>/dev/null || true) + if [ -n "$PLANS" ]; then + echo "Linting modified plans: $PLANS" + python scripts/lint_plan_grounding.py $PLANS + else + echo "No plan files modified in this PR — skipping" + fi diff --git a/.github/workflows/pr-body-refs-lint.yml b/.github/workflows/pr-body-refs-lint.yml new file mode 100644 index 00000000..72ca1060 --- /dev/null +++ b/.github/workflows/pr-body-refs-lint.yml @@ -0,0 +1,40 @@ +name: PR body refs lint + +# Issue #114 Check B — surfaces bare `#NUMBER` mentions in PR bodies that +# aren't wrapped by Closes/Fixes/Resolves/Refs keywords or under a +# `## Linked issues` section. Advisory only — never blocks merge. +# +# SECURITY: PR_BODY is contributor-controlled text. The script reads it +# via `--from-env PR_BODY` (direct os.environ read in Python). DO NOT +# replace with `echo "$PR_BODY" > file` — that pattern lets Bash +# command-substitution `$(cmd)` expand inside double quotes, which is +# OWASP A03 arbitrary code execution in CI. See module docstring for +# the full audit context (#114 audit v1 caught this). +# +# Note: when this workflow file lands, it does not run on the PR that +# adds it — pull_request workflows execute the version on the base +# branch. First execution is on the next qualifying PR after merge. + +on: + pull_request: + types: [opened, edited, reopened] + +permissions: + pull-requests: read + contents: read + +jobs: + lint: + name: PR body refs lint (advisory) + runs-on: ubuntu-latest + # Advisory: red here doesn't gate merge. + continue-on-error: true + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + - name: Run lint + env: + PR_BODY: ${{ github.event.pull_request.body }} + run: python .github/scripts/lint_pr_body_refs.py --from-env PR_BODY diff --git a/CHANGELOG.md b/CHANGELOG.md index 403594d8..5a116e05 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,25 @@ All notable changes to bicameral-mcp are tracked here. Format loosely follows ### Added +- **CI grounding lint for plan files and PR bodies (#114).** Two new + checkers ship together: + - `scripts/lint_plan_grounding.py` — walks `plan-*.md` files for + backtick-wrapped path tokens and verifies each resolves on the + working tree. Marks `**new**` / `(planned)` / `(future)` / + `(v2)` / `(nonexistent)` / `(example)` as exempt. Folded into + the existing `lint-and-typecheck.yml` workflow as a blocking + gate; only runs against plans modified in the current PR. + Closes the SG-PLAN-GROUNDING-DRIFT loop after three instances + in the v0.13/v0.16 development window. + - `.github/scripts/lint_pr_body_refs.py` + new + `.github/workflows/pr-body-refs-lint.yml` advisory workflow — + warns on bare `#NUMBER` mentions in PR bodies that aren't + wrapped by `Closes`/`Fixes`/`Resolves`/`Refs`/`Related`/`See` + keywords or under a `## Linked issues` heading. Reads PR body + via `--from-env PR_BODY` (direct `os.environ` read, no shell + interpolation; OWASP A03 mitigation caught at audit v1). + - Documentation: `DEV_CYCLE.md` §2.1 (plan-grounding lint + callout) + §4.3 (PR-body keyword discipline). Issue #114. - `handlers/preflight.py` — `_region_anchored_preflight` now expands caller-supplied `file_paths` by 1 hop along the code-locator graph's **import edges** before the `binds_to` lookup. Lifts the strict exact-match recall ceiling so a decision bound to `app/src/lib/git/reorder.ts` surfaces when the caller passes the structurally-near `app/src/ui/multi-commit-operation/reorder.tsx`. Decisions reached only via expansion carry `confidence=0.7` (vs `0.9` for direct pins). `sources_chained` includes `"graph"` (alongside `"region"`) when expansion contributed at least one hit. Bounded per #64: ≤10 input seeds × `max_neighbors_per_result` neighbors per seed. Closes #173 (and supersedes #64). - `adapters/code_locator.py::RealCodeLocatorAdapter.expand_file_paths_via_graph` — public method backing the expansion. Filters to ``imports`` edges only (file-level structural dependency); ``invokes`` / ``inherits`` / ``contains`` are symbol-level edges that over-broaden the file-level expansion. Returns `(expanded, added)` so callers can mark provenance. - `skills/bicameral-preflight/SKILL.md` Step 2 — documents the imports-only expansion + caller-side `confidence` and `sources_chained` semantics. @@ -18,8 +37,6 @@ All notable changes to bicameral-mcp are tracked here. Format loosely follows - `skills/bicameral-preflight/SKILL.md` Step 5.6 — judgment for contradiction-capture moves from the agent to the user via `AskUserQuestion` (Step 5.6.1). The agent no longer infers whether the prompt contradicts a surfaced decision; it asks the user (`supersede` / `keep_both` / `unrelated`) and acts mechanically on the answer (Step 5.6.2 — ingest + resolve_collision). The PostToolUse hook reminder now templates the disambiguation question rather than the bare ingest+resolve_collision sequence. Closes #175. - `tests/e2e/run_e2e_flows.py::assert_flow_2a` — pass criterion changed from "ingest+resolve_collision fired" to "`AskUserQuestion` invoked with disambiguation shape after preflight surfaced ≥1 decision." The user-side response can't be driven in headless `claude -p`, so the testable signal is the question invocation. The mechanical capture (Step 5.6.2) only fires after a human answers and is exercised in interactive Claude Code sessions, not CI. -### Changed - ### Fixed ### Schema diff --git a/docs/DEV_CYCLE.md b/docs/DEV_CYCLE.md index e1427dad..f989c11c 100644 --- a/docs/DEV_CYCLE.md +++ b/docs/DEV_CYCLE.md @@ -240,6 +240,14 @@ every box in that diagram. No back-doors to `main`. > see §4.4. Risk is a property of the change being made, knowable only after > design. Issues carry priority (urgency); PRs carry risk (review tier). +> **CI grounding lint (Issue #114)**: every plan committed to this repo +> runs through `scripts/lint_plan_grounding.py` in the lint workflow. +> The lint rejects plan files that reference filesystem paths not +> present on the working tree, unless the path is marked **new** or +> followed by `(planned)` / `(future)` / `(v2)` / `(nonexistent)` / +> `(example)` on its bullet. Author-time `ls -d */` is faster +> feedback; CI is the durable gate. Mitigation for SG-PLAN-GROUNDING-DRIFT. + #### 2.1.1 Priority labels (one per issue, mandatory after triage) Exactly one priority label per triaged issue. Untriaged issues carry `triage` @@ -406,6 +414,22 @@ Refs #60 (depends on continuity matcher landed there) The Plan/Audit/Seal section is **mandatory for any PR > 100 LOC or risk:L2+**. Smaller PRs may use `Plan: trivial; risk:L1`. +> **PR-body issue references (Issue #114)**: every PR body that +> mentions `#NUMBER` tokens should wrap them with one of: +> +> - **`Closes #N`** / `Fixes #N` / `Resolves #N` — full closure, fires +> GitHub auto-close + the `merged-to-dev` labeller workflow. +> - **`Refs #N`** / `Related to #N` / `See #N` — partial / related; +> creates the cross-link without auto-closing. +> - **Place under a `## Linked issues` heading** — section-level +> wrapping, equivalent to per-line keyword. +> +> The advisory workflow `pr-body-refs-lint.yml` warns (does not block) +> when bare `#NUMBER` mentions appear in prose without a wrapping. +> Bare mentions still create GitHub's auto-cross-reference, but skip +> the auto-close + labeller paths — surface them so authors choose +> intentionally rather than implicitly. + ### 4.4 Reviewers - Code-owner from `CODEOWNERS` is auto-requested. diff --git a/plan-114-grounding-lint.md b/plan-114-grounding-lint.md new file mode 100644 index 00000000..291acbc1 --- /dev/null +++ b/plan-114-grounding-lint.md @@ -0,0 +1,369 @@ +# Plan: CI lint for unstructured references in plan files and PR bodies (Issue #114) + +**Tracks**: BicameralAI/bicameral-mcp#114 — *CI lint for unstructured references in plan files and PR bodies* +**Targets**: v0.18.x (Jin's call at release-PR time) +**Branch**: `feat/114-grounding-lint` (off `BicameralAI/dev`, current tip `2e9a842` — post-#117 pre-push hook) +**Risk grade**: L1 — pure checker scripts + advisory CI workflow; no production code paths, no schema migrations, no MCP tool changes, no contract changes. +**Change class**: minor (additive lint scripts + new CI step + new advisory workflow + DEV_CYCLE.md docs). + +--- + +## Open Questions + +These are decisions worth flagging for audit; the plan proposes provisional answers. + +### Q1. Pre-commit hook + CI, or CI only? + +Issue body asks for both. Pre-commit requires `.pre-commit-config.yaml` infrastructure that does not currently exist in the repo (verified via `ls .pre-commit-config.yaml` → missing). Bootstrapping the pre-commit framework is its own concern with its own quirks (per-file vs per-commit, hook installation flow, contributor onboarding burden). + +**Recommend CI-only for v1.** A pre-commit hook is a small follow-up issue once the CI checkers prove themselves. The CI run is the canonical gate; pre-commit is just earlier feedback for the same checks. + +### Q2. Check A — what's a "registered top-level package"? + +Two options: + +- **Static list** of known packages (`adapters/`, `cli/`, `code_locator/`, `codegenome/`, `dashboard/`, `events/`, `governance/`, `handlers/`, `ledger/`) — drifts when packages are added. +- **Dynamic discovery** via `ls -d */` filtered by `__init__.py` presence — adapts automatically. + +**Recommend dynamic discovery** — every new top-level package gets `__init__.py`, so the lint stays current without manual maintenance. + +### Q3. Check B — block or warn? + +Issue body says "warn (not block) when bare `#NUMBER` mentions appear in prose without one of those wrappings." + +**Recommend warn (advisory check, not failing).** Bare `#NUMBER` mentions are sometimes legitimate (e.g., a release-notes paragraph that names every closed issue without `Closes` keywords because they were already closed). Hard-blocking creates churn; warning surfaces the smell without forcing action. + +### Q4. What counts as a "linked-issue keyword"? + +Standardised set, case-insensitive: `Closes`, `Fixes`, `Resolves` (GitHub auto-close), plus `Refs`, `Refs PR`, `Related to`, `Related`, `See` (advisory linking). Configurable via the script's argparse, hard-coded list for v1. + +### Q5. Where do the scripts live? + +- **Check A** (used both locally as a dev utility AND from CI): `scripts/lint_plan_grounding.py` — `scripts/` already exists for dev utilities (currently `sim_accountable.py`). +- **Check B** (CI-only, reads PR-body from GitHub Actions context): `.github/scripts/lint_pr_body_refs.py` — `.github/scripts/` already exists from PR #113 (`post_drift_comment.py`). + +**Recommend the asymmetry**: dev-utility script in `scripts/`; CI-only script in `.github/scripts/`. Mirrors the existing convention. + +### Q6. Does Check A interact with audit's grounding pass? + +The `/qor-audit` skill already runs grounding manually. Does Check A duplicate that work? + +**No** — they overlap but don't compete. CI lint is a fast pre-audit check (no SurrealDB, no LLM); audit's grounding is deeper (verifies API references, contract shapes, function signatures). Check A catches the easy 80% of SG-PLAN-GROUNDING-DRIFT instances earlier, freeing audit attention for harder cases. + +--- + +## Background (grounding — verified against `dev` HEAD `2e9a842`) + +- Top-level packages: `adapters/`, `assets/`, `classify/`, `cli/`, `code_locator/`, `codegenome/`, `dashboard/`, `docs/`, `events/`, `governance/`, `handlers/`, `ledger/`, `scripts/`, `skills/`, `tests/`, `thoughts/`. Verified via `ls -d */`. (Avoids SG-PLAN-GROUNDING-DRIFT instance #5.) +- `.github/workflows/`: `drift-report.yml`, `label-merged-to-dev.yml`, `lint-and-typecheck.yml`, `preflight-eval.yml`, `publish.yml`, `secret-scan.yml`, `test-mcp-regression.yml`, `test-schema-persistence.yml`. Lint workflow runs `ruff check .` + `ruff format --check .` + `mypy .` on PRs to `main` and `dev`. +- No `.pre-commit-config.yaml` exists. +- `scripts/` exists at repo root with `sim_accountable.py` and `CLAUDE.md`. +- `.github/scripts/post_drift_comment.py` (nonexistent) (from PR #113 — was the precedent for CI-only Python helpers; deleted on dev between this branch and merge). +- `cli/` is for user-facing console tools (`classify`, `branch_scan`); not the right home for a lint script. + +--- + +## Phase 0: Check A — plan-grounding lint + +TDD-light: tests written FIRST, confirm red, then implement, confirm green. + +### Affected files + +- `tests/test_lint_plan_grounding.py` — **new**, ~120 LOC, 8 tests covering path detection, exemption rules, suggested-correction output. +- `scripts/lint_plan_grounding.py` — **new**, ~140 LOC. Standalone Python script (no project imports) that walks plan files, classifies tokens, emits diagnostics. + +### Public interface + +```python +# scripts/lint_plan_grounding.py + +def lint_plan_file(path: pathlib.Path, repo_root: pathlib.Path) -> list[Diagnostic]: + """Walk a plan-*.md file, find filesystem-shaped path tokens, verify + each against the working tree (or the documented "new" exemption). + + Returns a list of Diagnostic records. Empty list = clean. + Pure function: no IO except reading the plan file and stat-ing + candidate paths. No git, no network.""" + + +def main(argv: list[str] | None = None) -> int: + """CLI entry. Walks `plan-*.md` and `docs/Planning/plan-*.md`, + runs lint_plan_file on each, prints diagnostics, returns 0 if + clean, 1 if any plan has unresolved paths.""" +``` + +### Diagnostic shape + +```python +@dataclasses.dataclass(frozen=True) +class Diagnostic: + path: pathlib.Path # plan file + line: int # 1-indexed line in plan + token: str # the candidate path that didn't resolve + suggestion: str | None # nearest-match guess from the registered packages +``` + +### Output format + +``` +plan-foo.md:42: 'bicameral/drift_report.py' does not exist + did you mean 'cli/drift_report.py'? (registered packages: cli/, codegenome/, handlers/, ...) +``` + +### Token detection rules + +A token is a lint candidate when ALL hold: + +1. Wrapped in backticks (` `…` `) or inside a fenced code block. +2. Contains at least one `/` (filesystem-shape). +3. Ends in a known extension (`.py`, `.yaml`, `.yml`, `.md`, `.json`, `.toml`, `.sh`, `.ts`, `.tsx`) OR has no extension AND matches `r"^[a-z_]+/$"` (package directory). +4. NOT preceded by an explicit "new" / "**new**" / "(new)" marker on the same Markdown bullet line. + +Token NOT a candidate when: +- Inside a `` HTML comment. +- Inside an indented quote block (`>` prefix). +- Followed by `(planned)` / `(future)` / `(v2)`. + +### Unit tests (Phase 0) + +- `tests/test_lint_plan_grounding.py`: + - `test_clean_plan_emits_no_diagnostics` — synthetic plan with only existing paths → empty list. + - `test_nonexistent_path_emits_diagnostic` — synthetic plan referencing `bicameral/foo.py` (nonexistent) → 1 diagnostic with line + token. + - `test_new_marker_exempts_path` — plan with `**new**` marker on the same line → no diagnostic. + - `test_planned_suffix_exempts_path` — plan with `(planned)` suffix → no diagnostic. + - `test_html_comment_skipped` — path inside `` block → no diagnostic. + - `test_suggestion_for_misspelled_package` — `bicameral/drift_report.py` (example) → suggests `cli/drift_report.py`. + - `test_main_exits_zero_when_all_clean` — `main()` against a clean fixture set → returncode 0. + - `test_main_exits_one_when_diagnostics` — `main()` against a fixture with one bad path → returncode 1. + +### Function-level razor + +- `lint_plan_file` ≤ 30 LOC (orchestrator). +- `main()` ≤ 25 LOC. +- Helpers: `_extract_path_tokens(text)` ≤ 25 LOC, `_is_exempt(token, line)` ≤ 20 LOC, `_resolve_or_suggest(token, repo_root)` ≤ 25 LOC. + +--- + +## Phase 1: Check B — PR-body refs lint + +TDD-light: tests written FIRST, confirm red, then implement, confirm green. + +### Affected files + +- `tests/test_lint_pr_body_refs.py` — **new**, ~100 LOC, 6 tests covering keyword recognition, bare-mention warnings, edge cases, AND the `--from-env` env-var read path (security-critical — see Phase 2 workflow). +- `.github/scripts/lint_pr_body_refs.py` — **new**, ~110 LOC. Stdlib-only checker that consumes a PR body via `--body ` (local dev / tests) or `--from-env ` (CI — direct env-var read avoids shell interpolation). Emits warnings for bare `#NUMBER` mentions. + +### Public interface + +```python +# .github/scripts/lint_pr_body_refs.py + +def lint_pr_body(body: str) -> list[Warning]: + """Walk a PR body's lines. For each `#NUMBER` token, classify as: + - structured (under 'Linked issues' header OR preceded by a recognised keyword) + - bare (warning emitted) + Returns warnings as Warning records. Pure function, no IO.""" + + +def main(argv: list[str] | None = None) -> int: + """CLI entry. Body source — exactly one of: + --body — read PR body from file (local dev / tests) + --from-env — read PR body from environment variable (CI) + + The ``--from-env`` path is the SECURITY-CRITICAL invocation: it lets + the CI workflow avoid passing user-controlled PR-body text through + a Bash shell, which would otherwise allow command-substitution + injection (OWASP A03). Direct ``os.environ[NAME]`` read. + + Runs lint_pr_body, prints warnings to stderr. Always returns 0 + (advisory check; never blocks merge).""" +``` + +### Recognised keywords (case-insensitive) + +`Closes`, `Closed`, `Fixes`, `Fixed`, `Resolves`, `Resolved`, `Refs`, `Refs PR`, `Related to`, `Related`, `See`. + +### Linked-issues section detection + +A section is detected when a Markdown heading (`#`/`##`/`###`) matches `r"^\s*#{1,6}\s+linked\s+issues?\s*$"` (case-insensitive). Tokens within that section's body (until the next heading) are exempt from bare-mention warnings. + +### Output format + +``` +warning: bare '#108' on line 12 — wrap with 'Closes #108' / 'Refs #108', or move to a 'Linked issues' section +``` + +### Unit tests (Phase 1) + +- `tests/test_lint_pr_body_refs.py`: + - `test_closes_keyword_recognised` — body with `Closes #42` → no warnings. + - `test_refs_keyword_recognised` — body with `Refs #42` → no warnings. + - `test_bare_mention_in_prose_warns` — body with `Phase 1 (#42):` → 1 warning. + - `test_linked_issues_section_exempts_bare_mentions` — body with `## Linked issues\n\n- #42` (bare under the section) → no warnings. + - `test_main_always_returns_zero` — even with warnings, exit code 0 (advisory). + - `test_main_reads_from_env_var` — set `PR_BODY` env var, invoke `main(['--from-env', 'PR_BODY'])`, verify warnings emitted match `--body file` mode. **Security-critical path — verifies the CI's no-shell-interpolation invocation works.** + +### Function-level razor + +- `lint_pr_body` ≤ 30 LOC. +- `main()` ≤ 20 LOC. +- Helpers: `_classify_token(line, ctx)` ≤ 20 LOC, `_is_in_linked_issues_section(line, prev_headings)` ≤ 15 LOC. + +--- + +## Phase 2: CI integration + +TDD-light: this phase has no new tests — it's CI plumbing. Phase 0 and 1 tests prove the checkers work; Phase 2 just wires them in. + +### Affected files + +- `.github/workflows/lint-and-typecheck.yml` — **modify**, +6 LOC. Add a step running `python scripts/lint_plan_grounding.py` after the existing `mypy` step. +- `.github/workflows/pr-body-refs-lint.yml` — **new**, ~30 LOC. Advisory workflow that runs Check B on PR open/edit. + +### `lint-and-typecheck.yml` modification + +```yaml + - name: Plan-grounding lint (#114 Check A) + run: python scripts/lint_plan_grounding.py +``` + +This blocks merge on plan-grounding violations (consistent with `ruff check`'s blocking semantics). + +### New `pr-body-refs-lint.yml` workflow + +```yaml +name: PR body refs lint + +on: + pull_request: + types: [opened, edited, reopened] + +permissions: + pull-requests: read + +jobs: + lint: + runs-on: ubuntu-latest + continue-on-error: true # advisory; never blocks merge + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: { python-version: '3.11' } + - name: Run lint + env: + PR_BODY: ${{ github.event.pull_request.body }} + run: python .github/scripts/lint_pr_body_refs.py --from-env PR_BODY +``` + +**Security note**: the `--from-env` argument tells the script to read +`PR_BODY` directly via `os.environ`, bypassing Bash entirely. An earlier +draft of this plan used `echo "$PR_BODY" > /tmp/pr-body.md` which is +vulnerable to OWASP A03 command-substitution injection (Bash double +quotes expand `$(cmd)`). Caught at audit (#114 v1 VETO). The current +pattern is safe — `os.environ[NAME]` is a direct memory read with +no shell interpreter in the path. + +`continue-on-error: true` is intentional — Check B is advisory. + +--- + +## Phase 3: Documentation + +TDD-light: this phase has no tests — it's pure documentation. + +### Affected files + +- `docs/DEV_CYCLE.md` — **modify**, +~30 LOC across two sections. +- `CHANGELOG.md` — **modify**, `[Unreleased]` entry under Added. + +### `DEV_CYCLE.md` updates + +Section §2.1 (issue creation, grounding-protocol references) — add: + +```markdown +> **CI grounding lint (Issue #114)**: every plan committed to this repo +> runs through `scripts/lint_plan_grounding.py` in the lint workflow. +> The lint rejects plan files that reference filesystem paths not +> present on the working tree, unless the path is marked **new** / +> **(planned)** on its bullet. Author-time `ls -d */` is faster +> feedback; CI is the durable gate. +``` + +Section §4.3 (PR body required sections) — add: + +```markdown +> **PR-body issue references**: every PR body that mentions `#NUMBER` +> tokens should wrap them with `Closes`/`Fixes`/`Resolves` (full +> closure) or `Refs` (related/partial) keywords, OR place them under +> a `## Linked issues` heading. The advisory workflow +> `pr-body-refs-lint.yml` warns (does not block) when bare mentions +> appear in prose. Issue #114. +``` + +### CHANGELOG entry + +```markdown +## [Unreleased] + +### Added + +- **CI grounding lint for plan files and PR bodies (#114).** Two new + checkers: `scripts/lint_plan_grounding.py` (filesystem-path + references in `plan-*.md`, blocks merge on unresolved paths) and + `.github/scripts/lint_pr_body_refs.py` (bare `#NUMBER` mentions in + PR bodies, advisory only). Plan-grounding check folds into the + existing `lint-and-typecheck.yml` workflow; PR-body check runs as a + new advisory workflow `pr-body-refs-lint.yml`. Closes the + SG-PLAN-GROUNDING-DRIFT loop after three instances this session. +``` + +--- + +## Test invocation + +```bash +# Phase 0 + 1 +python -m pytest -q tests/test_lint_plan_grounding.py tests/test_lint_pr_body_refs.py + +# Run the linters manually (dev workflow) +python scripts/lint_plan_grounding.py +echo "Closes #42" | python .github/scripts/lint_pr_body_refs.py --body /dev/stdin + +# CI gates these run on every PR (lint-and-typecheck.yml + pr-body-refs-lint.yml) +ruff check scripts/lint_plan_grounding.py .github/scripts/lint_pr_body_refs.py tests/test_lint_*.py +ruff format --check scripts/lint_plan_grounding.py .github/scripts/lint_pr_body_refs.py tests/test_lint_*.py +mypy scripts/lint_plan_grounding.py +``` + +--- + +## Section 4 razor pre-check + +| File | Estimate | Razor cap | OK? | +|---|---|---|---| +| `scripts/lint_plan_grounding.py` | ~140 LOC | ≤250 | yes | +| `.github/scripts/lint_pr_body_refs.py` | ~110 LOC | ≤250 | yes | +| `tests/test_lint_plan_grounding.py` | ~120 LOC | ≤250 | yes | +| `tests/test_lint_pr_body_refs.py` | ~100 LOC | ≤250 | yes | +| `pr-body-refs-lint.yml` | ~30 LOC | n/a (YAML) | n/a | + +Function-level: every new function ≤ 30 LOC entry / ≤ 25 LOC helpers / nesting ≤ 3 / no nested ternaries. + +--- + +## Exit criteria + +1. **Phase 0 GREEN**: 8/8 plan-grounding tests pass; `ruff check` + `format --check` + `mypy` clean. +2. **Phase 1 GREEN**: 5/5 PR-body-refs tests pass; ruff/format clean. +3. **Phase 2 wired**: lint-and-typecheck.yml runs the plan-grounding step on this PR; pr-body-refs-lint.yml workflow registered with GitHub Actions and runs on this PR. +4. **Phase 3 documented**: DEV_CYCLE.md §2.1 and §4.3 carry the lint references; CHANGELOG `[Unreleased]` entry committed. +5. **Self-test on this very PR**: the plan-grounding check runs against `plan-114-grounding-lint.md` itself and emits zero diagnostics. The PR-body lint runs against this PR's body and emits zero warnings (PR description will be authored with proper `## Linked issues` block). + +--- + +## What this plan is NOT + +- Not a pre-commit hook (deferred to a follow-up issue if CI proves the checkers). +- Not an auto-fix tool — surfaces violations, doesn't rewrite plans or PR bodies. +- Not an API/contract grounding lint — Check A only verifies filesystem paths. API/contract verification stays with `/qor-audit`'s deeper grounding pass. +- Not a hard-block on PR-body warnings — Check B is advisory by design. diff --git a/scripts/lint_plan_grounding.py b/scripts/lint_plan_grounding.py new file mode 100644 index 00000000..ea904fbd --- /dev/null +++ b/scripts/lint_plan_grounding.py @@ -0,0 +1,212 @@ +"""Issue #114 Phase 0 — plan-grounding lint. + +Walks `plan-*.md` (or `docs/Planning/plan-*.md`) files looking for +filesystem-shaped path tokens inside backticks or fenced code blocks. +For each candidate, verifies the path resolves on the working tree. +Unresolved paths emit a Diagnostic; the CLI exits non-zero if any +plan has diagnostics. + +Exemptions: tokens marked ``**new**`` / ``(planned)`` / ``(future)`` +/ ``(v2)`` on their bullet line; tokens inside HTML comments +(````); tokens inside Markdown blockquotes (``>`` prefix). + +Stdlib only — pathlib + re + argparse + dataclasses. No project +imports. Designed to run both as a CI step and as a dev-side +``python scripts/lint_plan_grounding.py`` invocation. + +Mitigation for SG-PLAN-GROUNDING-DRIFT (the Shadow Genome pattern +where plan authors claim filesystem paths without verifying). See +Issue #114 for context. +""" + +from __future__ import annotations + +import argparse +import dataclasses +import re +import sys +from pathlib import Path + +# Tokens to recognise as path-shaped: backtick-wrapped strings that +# contain a slash and end in a known extension (or look like a +# package directory). +_PATH_TOKEN_RE = re.compile( + r"`([^`\s][^`]*?[^`\s])`" # contents of a backtick-delimited span +) + +_KNOWN_EXTS = ( + ".py", + ".pyi", + ".yaml", + ".yml", + ".md", + ".json", + ".toml", + ".sh", + ".ts", + ".tsx", + ".js", + ".jsx", + ".rs", + ".go", + ".java", + ".cs", +) +_PACKAGE_DIR_RE = re.compile(r"^[a-z_][a-z0-9_]*/$") + +_NEW_MARKER_RE = re.compile(r"\*\*new\*\*", re.IGNORECASE) +_PLANNED_SUFFIX_RE = re.compile( + r"\((planned|future|v2|nonexistent|example)\)", + re.IGNORECASE, +) +_HTML_COMMENT_OPEN = "" + + +@dataclasses.dataclass(frozen=True) +class Diagnostic: + """One unresolved path token in a plan file.""" + + path: Path # plan file + line: int # 1-indexed + token: str # the path that did not resolve + + +# ── Public entry (≤ 30 LOC) ────────────────────────────────────────── + + +def lint_plan_text(text: str, repo_root: Path) -> list[Diagnostic]: + """Walk a plan-*.md content string, collect Diagnostics for + unresolved path tokens. Pure function — no IO except `repo_root / + candidate` resolution stat-checks.""" + diagnostics: list[Diagnostic] = [] + in_html_comment = False + for lineno, line in enumerate(text.splitlines(), start=1): + # Strip any single-line `` blocks; multi-line + # comments are tracked via the in_html_comment state. + cleaned = _strip_inline_comments(line) + in_html_comment = _update_comment_state(cleaned, in_html_comment) + if in_html_comment or _is_blockquote(cleaned): + continue + if _is_exempt_line(cleaned): + continue + for token in _extract_path_tokens(cleaned): + if not _is_path_shaped(token): + continue + if not (repo_root / token).exists(): + diagnostics.append(Diagnostic(path=Path(""), line=lineno, token=token)) + return diagnostics + + +# ── Helpers (each ≤ 25 LOC) ────────────────────────────────────────── + + +def _extract_path_tokens(line: str) -> list[str]: + """Pull every backtick-wrapped token from a Markdown line.""" + return [m.group(1) for m in _PATH_TOKEN_RE.finditer(line)] + + +def _is_path_shaped(token: str) -> bool: + """Token looks filesystem-shaped: contains `/` AND ends in a known + extension OR matches the package-directory pattern. Excludes + tokens with internal whitespace (multi-word, not a single path) + and glob patterns (``*``, ``?``, ``[``).""" + if "/" not in token: + return False + if any(c.isspace() for c in token): + return False + if any(c in token for c in ("*", "?", "[")): + return False + if any(token.endswith(ext) for ext in _KNOWN_EXTS): + return True + return bool(_PACKAGE_DIR_RE.match(token)) + + +def _is_exempt_line(line: str) -> bool: + """Line carries an explicit `**new**` / `(planned)` / `(future)` + / `(v2)` marker that signals the author KNOWS the path doesn't + yet exist. Lint passes.""" + if _NEW_MARKER_RE.search(line): + return True + if _PLANNED_SUFFIX_RE.search(line): + return True + return False + + +def _is_blockquote(line: str) -> bool: + """Markdown blockquotes start with `>` (after optional whitespace). + Treat as illustrative quotations, not file claims.""" + return line.lstrip().startswith(">") + + +def _update_comment_state(line: str, in_comment: bool) -> bool: + """Track multi-line HTML comments. Returns the state AFTER + processing this line — simple toggle on open/close markers. + Single-line comments are stripped by ``_strip_inline_comments`` + BEFORE this is called, so the only case that flips state is a + multi-line open.""" + if in_comment: + return _HTML_COMMENT_CLOSE not in line + if _HTML_COMMENT_OPEN in line and _HTML_COMMENT_CLOSE not in line: + return True + return False + + +def _strip_inline_comments(line: str) -> str: + """Remove every `` block on a single line. The state + machine elsewhere handles multi-line comments. After this call, + any remaining ``) opens a multi-line + comment block.""" + while True: + start = line.find(_HTML_COMMENT_OPEN) + end = line.find(_HTML_COMMENT_CLOSE, start) + if start == -1 or end == -1: + return line + line = line[:start] + line[end + len(_HTML_COMMENT_CLOSE) :] + + +# ── CLI entry (≤ 25 LOC) ───────────────────────────────────────────── + + +def main(argv: list[str] | None = None) -> int: + """Walk every passed plan path (or all `plan-*.md` at repo root + when none passed). Print diagnostics. Return 0 if clean, 1 + otherwise.""" + args = _parse_args(argv) + plans = _collect_plans(args.paths) + repo_root = Path.cwd() + total = 0 + for plan_path in plans: + text = plan_path.read_text(encoding="utf-8") + for diag in lint_plan_text(text, repo_root=repo_root): + total += 1 + print( + f"{plan_path}:{diag.line}: '{diag.token}' does not exist", + file=sys.stderr, + ) + if total: + print(f"\n{total} diagnostic(s) across {len(plans)} plan(s)", file=sys.stderr) + return 1 + return 0 + + +def _parse_args(argv: list[str] | None) -> argparse.Namespace: + parser = argparse.ArgumentParser(prog="lint_plan_grounding") + parser.add_argument("paths", nargs="*", help="plan files to lint") + return parser.parse_args(argv) + + +def _collect_plans(paths: list[str]) -> list[Path]: + """If specific paths passed, use them. Otherwise glob plan-*.md + at the repo root + docs/Planning/plan-*.md if that dir exists.""" + if paths: + return [Path(p) for p in paths] + out: list[Path] = sorted(Path.cwd().glob("plan-*.md")) + planning_dir = Path("docs/Planning") + if planning_dir.exists(): + out.extend(sorted(planning_dir.glob("plan-*.md"))) + return out + + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) diff --git a/tests/test_lint_plan_grounding.py b/tests/test_lint_plan_grounding.py new file mode 100644 index 00000000..27a60e41 --- /dev/null +++ b/tests/test_lint_plan_grounding.py @@ -0,0 +1,117 @@ +"""Issue #114 Phase 0 — plan-grounding lint contract tests. + +Pure-function tests on ``scripts.lint_plan_grounding``. Each test +constructs a synthetic plan-*.md content string and asserts on the +diagnostic list the linter produces. No real plan files are read, +no git, no network. +""" + +from __future__ import annotations + +import importlib.util +import sys +from pathlib import Path + +# Load the script as a module so tests can call its public functions +# without requiring it to be a proper Python package (it's a standalone +# dev/CI utility under scripts/). +_SCRIPT_PATH = Path(__file__).resolve().parent.parent / "scripts" / "lint_plan_grounding.py" +_SPEC = importlib.util.spec_from_file_location("lint_plan_grounding", _SCRIPT_PATH) +assert _SPEC is not None and _SPEC.loader is not None +_MODULE = importlib.util.module_from_spec(_SPEC) +sys.modules["lint_plan_grounding"] = _MODULE +_SPEC.loader.exec_module(_MODULE) + +lint_plan_text = _MODULE.lint_plan_text +main = _MODULE.main + + +def _existing_repo_path() -> Path: + """Return a path that's known to exist on the repo working tree + (this very test directory). Used in synthetic plan inputs that + must not trigger diagnostics.""" + return Path("tests/test_lint_plan_grounding.py") + + +def test_clean_plan_emits_no_diagnostics(tmp_path) -> None: + """A plan referencing only existing paths produces zero + diagnostics — the lint must not false-positive on cleanly-grounded + plans.""" + plan = ( + "# Plan: clean grounding\n\n" + "Modifies `scripts/lint_plan_grounding.py` and " + "`tests/test_lint_plan_grounding.py` (both real).\n" + ) + diagnostics = lint_plan_text(plan, repo_root=Path(".")) + assert diagnostics == [] + + +def test_nonexistent_path_emits_diagnostic(tmp_path) -> None: + """A plan referencing a path that does not exist must emit one + diagnostic per unresolved token. Carries the line number for + actionable feedback.""" + plan = "# Plan: bad grounding\n\nAdds `bicameral/foo_module.py` to the repo.\n" + diagnostics = lint_plan_text(plan, repo_root=Path(".")) + assert len(diagnostics) == 1 + diag = diagnostics[0] + assert diag.token == "bicameral/foo_module.py" + assert diag.line == 3 # 1-indexed; the third line of the synthetic plan + + +def test_new_marker_exempts_path() -> None: + """A path explicitly marked **new** on its bullet line is exempt + from grounding — plans deliberately propose new files.""" + plan = "# Plan: with new marker\n\n- `bicameral/brand_new_module.py` — **new**, ~50 LOC.\n" + diagnostics = lint_plan_text(plan, repo_root=Path(".")) + assert diagnostics == [] + + +def test_planned_suffix_exempts_path() -> None: + """A path followed by a `(planned)` / `(future)` / `(v2)` suffix + is exempt — author signals the path is aspirational, not extant.""" + plan = ( + "# Plan: with planned suffix\n\n" + "Future module `bicameral/v2_optimizer.py` (planned) — see Phase 5.\n" + ) + diagnostics = lint_plan_text(plan, repo_root=Path(".")) + assert diagnostics == [] + + +def test_html_comment_skipped() -> None: + """Tokens inside `` HTML comments are skipped — those + are author notes / examples that shouldn't be linted.""" + plan = ( + "# Plan: with HTML comment\n\n" + "\n" + "Real change: `scripts/lint_plan_grounding.py`.\n" + ) + diagnostics = lint_plan_text(plan, repo_root=Path(".")) + assert diagnostics == [] + + +def test_quote_block_skipped() -> None: + """Tokens inside Markdown blockquotes (`>` prefix) are skipped — + those are typically illustrative quotations, not file claims.""" + plan = ( + "# Plan: with blockquote\n\n" + "> The audit said: `bicameral/foo.py` does not exist. Fixed in v2.\n" + "Real change: `scripts/lint_plan_grounding.py`.\n" + ) + diagnostics = lint_plan_text(plan, repo_root=Path(".")) + assert diagnostics == [] + + +def test_main_exits_zero_when_all_clean(tmp_path) -> None: + """``main([str(plan_path)])`` returns 0 when the plan grounds + cleanly. Used by CI as the gate signal.""" + plan = tmp_path / "plan-clean.md" + plan.write_text("Touches `scripts/lint_plan_grounding.py`.\n") + assert main([str(plan)]) == 0 + + +def test_main_exits_one_when_diagnostics(tmp_path) -> None: + """``main([str(plan_path)])`` returns 1 when any diagnostic fires. + CI must block the merge in that state.""" + plan = tmp_path / "plan-bad.md" + plan.write_text("Touches `bicameral/nonexistent.py`.\n") + assert main([str(plan)]) == 1 diff --git a/tests/test_lint_pr_body_refs.py b/tests/test_lint_pr_body_refs.py new file mode 100644 index 00000000..1c661169 --- /dev/null +++ b/tests/test_lint_pr_body_refs.py @@ -0,0 +1,82 @@ +"""Issue #114 Phase 1 — PR-body refs lint contract tests. + +Pure-function tests on ``.github/scripts/lint_pr_body_refs``. Each +test passes a synthetic PR body string to the linter and asserts on +the warning list. Includes the SECURITY-CRITICAL ``--from-env`` test +that verifies the no-shell-interpolation invocation matches file-mode +output. +""" + +from __future__ import annotations + +import importlib.util +import sys +from pathlib import Path + +_SCRIPT_PATH = ( + Path(__file__).resolve().parent.parent / ".github" / "scripts" / "lint_pr_body_refs.py" +) +_SPEC = importlib.util.spec_from_file_location("lint_pr_body_refs", _SCRIPT_PATH) +assert _SPEC is not None and _SPEC.loader is not None +_MODULE = importlib.util.module_from_spec(_SPEC) +sys.modules["lint_pr_body_refs"] = _MODULE +_SPEC.loader.exec_module(_MODULE) + +lint_pr_body = _MODULE.lint_pr_body +main = _MODULE.main + + +def test_closes_keyword_recognised() -> None: + """Body with `Closes #42` produces zero warnings — that's the + canonical full-closure pattern.""" + body = "## Summary\n\nFixes the bug.\n\nCloses #42\n" + assert lint_pr_body(body) == [] + + +def test_refs_keyword_recognised() -> None: + """Body with `Refs #42` produces zero warnings — partial / + architectural reference, not a closure.""" + body = "## Summary\n\nRelated work.\n\nRefs #42\n" + assert lint_pr_body(body) == [] + + +def test_bare_mention_in_prose_warns() -> None: + """Body with a bare `#42` in prose (no Closes/Refs keyword, + not under a Linked-issues section) triggers a warning.""" + body = "## Summary\n\nPhase 1 (#42) — adds the contracts.\n" + warnings = lint_pr_body(body) + assert len(warnings) == 1 + assert warnings[0].number == 42 + + +def test_linked_issues_section_exempts_bare_mentions() -> None: + """Bare `#NUMBER` tokens under a `## Linked issues` heading are + exempt — the section header itself is the link wrapper.""" + body = "## Linked issues\n\n- #42\n- #43\n" + assert lint_pr_body(body) == [] + + +def test_main_always_returns_zero(tmp_path) -> None: + """``main()`` is advisory — always returns 0 even when warnings + are emitted. CI uses the warnings as informational signal, not + a merge gate.""" + body_file = tmp_path / "body.md" + body_file.write_text("Bare mention (#42) in prose.\n") + assert main(["--body", str(body_file)]) == 0 + + +def test_main_reads_from_env_var(monkeypatch, capsys) -> None: + """SECURITY-CRITICAL path: the CI workflow uses ``--from-env + PR_BODY`` to avoid shell-string interpolation of user-controlled + PR-body text (OWASP A03 mitigation per #114 audit v1). + + Verify that ``--from-env`` produces the SAME warnings as + ``--body file`` for identical input.""" + body = "## Summary\n\nPhase 1 (#108) prose mention.\n" + monkeypatch.setenv("BICAMERAL_TEST_PR_BODY", body) + rc = main(["--from-env", "BICAMERAL_TEST_PR_BODY"]) + assert rc == 0 # advisory + captured = capsys.readouterr() + # The bare #108 mention should produce a warning to stderr + assert "108" in captured.err + assert "warning" in captured.err.lower()