diff --git a/tools/hygiene/audit-rule-cross-refs.test.ts b/tools/hygiene/audit-rule-cross-refs.test.ts new file mode 100644 index 0000000000..e2257d93ff --- /dev/null +++ b/tools/hygiene/audit-rule-cross-refs.test.ts @@ -0,0 +1,132 @@ +// audit-rule-cross-refs.test.ts — basic correctness tests for the rule cross-ref auditor. +// +// Tests the pure pull/exists/render functions. The audit() integration +// is exercised end-to-end via CLI run in the shard PR's verify step. + +import { describe, expect, test } from "bun:test"; +import { pullRefs, refExists, renderReport } from "./audit-rule-cross-refs.ts"; + +describe("pullRefs", () => { + test("pulls backtick'd .md path references", () => { + const content = "See `memory/feedback_example.md` for details."; + const refs = pullRefs(content, "test.md"); + expect(refs).toHaveLength(1); + expect(refs[0]!.raw).toBe("memory/feedback_example.md"); + expect(refs[0]!.kind).toBe("path"); + }); + + test("pulls backtick'd .ts path references", () => { + const content = "Wire via `tools/peer-call/claude.ts`"; + const refs = pullRefs(content, "test.md"); + expect(refs).toHaveLength(1); + expect(refs[0]!.raw).toBe("tools/peer-call/claude.ts"); + }); + + test("pulls B-NNNN backlog ID references", () => { + const content = "Composes with B-0192 and B-0506."; + const refs = pullRefs(content, "test.md"); + expect(refs).toHaveLength(2); + expect(refs.map((r) => r.raw).sort()).toEqual(["B-0192", "B-0506"]); + expect(refs.every((r) => r.kind === "backlog-id")).toBe(true); + }); + + test("skips placeholders containing < or $", () => { + const content = "Use `.md` template or `$VAR.ts`"; + const refs = pullRefs(content, "test.md"); + expect(refs).toHaveLength(0); + }); + + test("dedups repeated references in same file", () => { + const content = "See `foo.md` ... then `foo.md` again and B-0001 twice: B-0001"; + const refs = pullRefs(content, "test.md"); + expect(refs).toHaveLength(2); + expect(refs.map((r) => r.raw).sort()).toEqual(["B-0001", "foo.md"]); + }); +}); + +describe("refExists", () => { + test("returns true for an existing file", () => { + // CLAUDE.md is at the repo root and exists in any healthy checkout. + expect(refExists({ fromRule: "test.md", raw: "CLAUDE.md", kind: "path" })).toBe(true); + }); + + test("returns false for a missing file", () => { + expect(refExists({ fromRule: "test.md", raw: "definitely-does-not-exist-xyz.md", kind: "path" })).toBe(false); + }); + + test("resolves a real backlog ID via dir scan", () => { + // B-0506 was filed earlier today; should resolve. + expect(refExists({ fromRule: "test.md", raw: "B-0506", kind: "backlog-id" })).toBe(true); + }); + + test("returns false for a non-existent backlog ID", () => { + expect(refExists({ fromRule: "test.md", raw: "B-9999", kind: "backlog-id" })).toBe(false); + }); + + test("resolves a glob pattern when at least one match exists", () => { + // memory/feedback_*.md will match many files. + expect(refExists({ fromRule: "test.md", raw: "memory/feedback_*.md", kind: "path" })).toBe(true); + }); + + test("resolves a glob with wildcard in a directory segment (Codex P2 catch on PR #3202)", () => { + // docs/backlog/P*/B-*.md is referenced from backlog-item-start-gate.md. + // Earlier implementation treated the substring before the last "/" as a + // literal directory; this regression test covers the directory-segment wildcard. + expect(refExists({ fromRule: "test.md", raw: "docs/backlog/P*/B-*.md", kind: "path" })).toBe(true); + }); + + test("returns false for a glob that matches no files in any segment", () => { + expect(refExists({ fromRule: "test.md", raw: "no-such-dir-*/nothing-*.md", kind: "path" })).toBe(false); + }); + + test("resolves brace-expansion patterns (Codex P2 re-review on PR #3202)", () => { + // lost-files-surface.md references feedback_rule_number_{one,two,three,...}_*aaron_*.md. + // Brace globs should expand to alternatives, each tested as a star-glob. + expect( + refExists({ + fromRule: "test.md", + raw: "memory/feedback_rule_number_{one,two,three,four,five,six,seven}_*aaron_2026_05_05.md", + kind: "path", + }), + ).toBe(true); + }); + + test("returns false for brace pattern where no alternative matches", () => { + expect( + refExists({ + fromRule: "test.md", + raw: "no-such-dir/{alpha,beta,gamma}-no-match.md", + kind: "path", + }), + ).toBe(false); + }); +}); + +describe("renderReport", () => { + test("renders a no-candidates report", () => { + const fixed = new Date("2026-05-14T00:00:00Z"); + const md = renderReport( + { rulesScanned: 10, refsFound: 50, candidatesStale: [], resolvedCount: 50 }, + fixed, + ); + expect(md).toContain("Rules scanned: 10"); + expect(md).toContain("Resolved: 50"); + expect(md).toContain("Stale-pointer candidates: 0"); + expect(md).toContain("_None — all references resolve._"); + }); + + test("renders a candidates table", () => { + const fixed = new Date("2026-05-14T00:00:00Z"); + const md = renderReport( + { + rulesScanned: 1, + refsFound: 1, + resolvedCount: 0, + candidatesStale: [{ fromRule: "rule.md", raw: "missing.md", kind: "path" }], + }, + fixed, + ); + expect(md).toContain("| Rule | Kind | Reference |"); + expect(md).toContain("| `rule.md` | path | `missing.md` |"); + }); +}); diff --git a/tools/hygiene/audit-rule-cross-refs.ts b/tools/hygiene/audit-rule-cross-refs.ts new file mode 100644 index 0000000000..7457a7b078 --- /dev/null +++ b/tools/hygiene/audit-rule-cross-refs.ts @@ -0,0 +1,331 @@ +#!/usr/bin/env bun +// audit-rule-cross-refs.ts — detect stale-pointer candidates in `.claude/rules/*.md` +// +// Mechanizes the razor-cadence item 4 (composes-with audit) work that landed +// manually across 12 ticks on 2026-05-14 (50/50 rules, 217/218 LIVE, 1 finding +// captured by B-0514). See the cumulative final shard at +// `docs/hygiene-history/ticks/2026/05/14/1920Z.md` for context. +// +// Scope (first slice — mechanical Layer A only): +// +// - Scan all `.claude/rules/*.md` files +// - Pull backtick'd path references (`.md`, `.ts`, etc.) +// - Pull backlog ID references (B-NNNN) +// - Test existence via direct path + glob + per-row backlog file lookup +// - Report stale-pointer CANDIDATES (failed existence) +// +// Out of scope (Layer B — semantic classification): +// +// The 9-variant reference-classification taxonomy from the 12-batch manual +// audit (concrete | glob | template-path | backlog-ID | legacy-noted | +// transient | anti-pattern | conditional | alternative-location) requires +// reading the rule's prose context around each reference. ~5% of MISSes are +// rule-acknowledged-not-exists (healthy). This tool produces the *candidate +// list* — human / Otto judgment classifies each candidate's variant. +// +// See `docs/hygiene-history/ticks/2026/05/14/1920Z.md` for the full taxonomy. +// +// Usage: +// +// bun tools/hygiene/audit-rule-cross-refs.ts # detect-only, exit 0 always +// bun tools/hygiene/audit-rule-cross-refs.ts --report PATH # write markdown report +// +// Exit codes: +// +// 0 always (detect-only; no enforcement; humans triage stale-pointer candidates) +// 64 argument error +// +// DST-friendliness: +// +// Read-only audit. The "Generated" timestamp in markdown reports is the only +// non-deterministic surface. Per `typescript.md` universal-DST gate. + +import { existsSync, readFileSync, readdirSync, statSync, writeFileSync } from "node:fs"; +import { join } from "node:path"; + +const RULES_DIR = ".claude/rules"; +const BACKLOG_DIR = "docs/backlog"; + +type AuditExitCode = 0 | 64; + +interface Args { + readonly report: string | null; +} + +interface Ref { + readonly fromRule: string; + readonly raw: string; + readonly kind: "path" | "backlog-id"; +} + +interface AuditResult { + readonly rulesScanned: number; + readonly refsFound: number; + readonly candidatesStale: Ref[]; + readonly resolvedCount: number; +} + +function parseArgs(argv: string[]): { kind: "args"; args: Args } | { kind: "error"; message: string } { + let report: string | null = null; + let i = 0; + while (i < argv.length) { + const a = argv[i]!; + if (a === "--report") { + const next = argv[i + 1]; + if (!next) return { kind: "error", message: "--report requires a path" }; + report = next; + i += 2; + } else { + return { kind: "error", message: `Unknown argument: ${a}` }; + } + } + return { kind: "args", args: { report } }; +} + +function pullRefs(content: string, ruleFile: string): Ref[] { + const refs: Ref[] = []; + + // Backtick'd path references + const pathPattern = /`([^`]+\.(md|ts|sh|fs|fsi|cs|yml|json))`/g; + let m: RegExpExecArray | null; + while ((m = pathPattern.exec(content)) !== null) { + const raw = m[1]!; + if (raw.includes("<") || raw.includes("$")) continue; + refs.push({ fromRule: ruleFile, raw, kind: "path" }); + } + + // Backlog ID references (B-NNNN) + const idPattern = /\bB-([0-9]{4})\b/g; + while ((m = idPattern.exec(content)) !== null) { + refs.push({ fromRule: ruleFile, raw: `B-${m[1]!}`, kind: "backlog-id" }); + } + + // Dedup + const seen = new Set(); + return refs.filter((r) => { + const key = `${r.kind}:${r.raw}`; + if (seen.has(key)) return false; + seen.add(key); + return true; + }); +} + +// Escape regex metacharacters except `*`, which we replace with `.*` afterwards. +// Explicitly escapes backslash too (per CodeQL 81 finding). +function escapeForGlobRegex(s: string): string { + return s + .replace(/[\\^$+?.()|[\]{}]/g, "\\$&") + .replace(/\*/g, ".*"); +} + +// Expand bash-style brace globs (`{a,b,c}` → 3 alternatives). Caught by Codex +// P2 thread on PR #3202: rules like lost-files-surface.md use brace-expansion +// patterns (e.g., `feedback_rule_number_{one,two,three}_*aaron_*.md`) that +// don't match if `{` and `}` are escaped literally. +// +// Expands the FIRST brace group encountered, then recurses on each expansion. +// Most patterns have only one brace group; nested braces are uncommon in +// practice. +function expandBraces(pattern: string): string[] { + const idx = pattern.indexOf("{"); + if (idx === -1) return [pattern]; + let depth = 0; + let close = -1; + for (let i = idx; i < pattern.length; i++) { + if (pattern[i] === "{") depth++; + else if (pattern[i] === "}") { + depth--; + if (depth === 0) { + close = i; + break; + } + } + } + if (close === -1) return [pattern]; + const before = pattern.slice(0, idx); + const after = pattern.slice(close + 1); + const body = pattern.slice(idx + 1, close); + const alternatives = body.split(","); + const results: string[] = []; + for (const alt of alternatives) { + const expanded = before + alt + after; + for (const sub of expandBraces(expanded)) results.push(sub); + } + return results; +} + +// Resolve a path that may contain `*` wildcards in any segment (not only the +// last). Walks segment-by-segment from the leftmost directory: wildcards in +// earlier segments expand to directory listings, wildcards in the final segment +// match file basenames. Caught by Codex P2 thread on PR #3202. Brace-glob +// support added in second iteration per Codex re-review. +function globResolvesSingle(pattern: string): boolean { + const segments = pattern.split("/"); + let candidates: string[] = pattern.startsWith("/") ? ["/"] : ["."]; + for (let i = 0; i < segments.length; i++) { + const seg = segments[i]!; + if (i === 0 && pattern.startsWith("/")) continue; + if (seg === "") continue; + const isLast = i === segments.length - 1; + const hasWild = seg.includes("*"); + const next: string[] = []; + for (const base of candidates) { + if (!hasWild) { + const child = base === "." ? seg : `${base}/${seg}`; + if (isLast) { + if (existsSync(child)) return true; + } else if (existsSync(child) && statSync(child).isDirectory()) { + next.push(child); + } + continue; + } + if (!existsSync(base) || !statSync(base).isDirectory()) continue; + const regex = new RegExp("^" + escapeForGlobRegex(seg) + "$"); + let entries: string[]; + try { + entries = readdirSync(base); + } catch { + continue; + } + for (const entry of entries) { + if (!regex.test(entry)) continue; + const child = base === "." ? entry : `${base}/${entry}`; + if (isLast) { + if (existsSync(child)) return true; + } else if (existsSync(child) && statSync(child).isDirectory()) { + next.push(child); + } + } + } + candidates = next; + if (candidates.length === 0 && !isLast) return false; + } + return false; +} + +// Public globResolves: expand braces first, then check each expansion via the +// star-only single resolver. Returns true if ANY expansion has a match. +function globResolves(pattern: string): boolean { + for (const expanded of expandBraces(pattern)) { + if (existsSync(expanded)) return true; + if (expanded.includes("*") && globResolvesSingle(expanded)) return true; + } + return false; +} + +function refExists(ref: Ref): boolean { + if (ref.kind === "path") { + if (existsSync(ref.raw)) return true; + if (ref.raw.includes("*") || ref.raw.includes("{")) return globResolves(ref.raw); + return false; + } + if (ref.kind === "backlog-id") { + const match = ref.raw.match(/B-(\d{4})/); + if (!match) return false; + const id = match[1]!; + if (!existsSync(BACKLOG_DIR)) return false; + for (const p of ["P0", "P1", "P2", "P3"]) { + const dir = join(BACKLOG_DIR, p); + if (!existsSync(dir)) continue; + const files = readdirSync(dir); + if (files.some((f) => f.startsWith(`B-${id}-`))) return true; + } + return false; + } + return false; +} + +function audit(rulesDir: string): AuditResult { + if (!existsSync(rulesDir)) { + return { rulesScanned: 0, refsFound: 0, candidatesStale: [], resolvedCount: 0 }; + } + + const ruleFiles = readdirSync(rulesDir) + .filter((f) => f.endsWith(".md")) + .sort(); + + let refsFound = 0; + let resolvedCount = 0; + const candidatesStale: Ref[] = []; + + for (const ruleFile of ruleFiles) { + const content = readFileSync(join(rulesDir, ruleFile), "utf8"); + const refs = pullRefs(content, ruleFile); + refsFound += refs.length; + for (const ref of refs) { + if (refExists(ref)) { + resolvedCount++; + } else { + candidatesStale.push(ref); + } + } + } + + return { + rulesScanned: ruleFiles.length, + refsFound, + candidatesStale, + resolvedCount, + }; +} + +function renderReport(result: AuditResult, now: Date): string { + const lines: string[] = []; + lines.push("# `.claude/rules/` cross-reference audit"); + lines.push(""); + lines.push(`Generated: ${now.toISOString()}`); + lines.push(""); + lines.push("## Summary"); + lines.push(""); + lines.push(`- Rules scanned: ${result.rulesScanned}`); + lines.push(`- References pulled: ${result.refsFound}`); + lines.push(`- Resolved: ${result.resolvedCount}`); + lines.push(`- Stale-pointer candidates: ${result.candidatesStale.length}`); + lines.push(""); + lines.push("## 9-variant taxonomy reminder"); + lines.push(""); + lines.push("Candidates listed below failed direct path / glob / backlog-row lookup. Apply the"); + lines.push("9-variant taxonomy from `docs/hygiene-history/ticks/2026/05/14/1920Z.md` to classify"); + lines.push("each candidate before fixing — ~5% are healthy MISSes (rule-acknowledged-transient,"); + lines.push("conditional, alternative-location, legacy-noted, anti-pattern)."); + lines.push(""); + lines.push("## Candidates"); + lines.push(""); + if (result.candidatesStale.length === 0) { + lines.push("_None — all references resolve._"); + } else { + lines.push("| Rule | Kind | Reference |"); + lines.push("|------|------|-----------|"); + for (const c of result.candidatesStale) { + lines.push(`| \`${c.fromRule}\` | ${c.kind} | \`${c.raw}\` |`); + } + } + lines.push(""); + return lines.join("\n"); +} + +function main(argv: string[]): AuditExitCode { + const parsed = parseArgs(argv); + if (parsed.kind === "error") { + console.error(`error: ${parsed.message}`); + return 64; + } + + const result = audit(RULES_DIR); + const report = renderReport(result, new Date()); + + if (parsed.args.report) { + writeFileSync(parsed.args.report, report); + console.log(`wrote ${parsed.args.report}`); + } else { + console.log(report); + } + + return 0; +} + +if (import.meta.main) { + process.exit(main(process.argv.slice(2))); +} + +export { audit, globResolves, pullRefs, refExists, renderReport };