From d68e58084ea721b6e2ceb47096953ecb3e8c7ed7 Mon Sep 17 00:00:00 2001 From: Aaron Stainback Date: Sat, 2 May 2026 12:53:40 -0400 Subject: [PATCH] =?UTF-8?q?tools(hygiene):=20B-0162=20=E2=80=94=20role-ref?= =?UTF-8?q?=20check=20for=20current-state=20surfaces=20(soft-launch=20by?= =?UTF-8?q?=20default)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements the pre-commit hook from B-0162 (P1 backlog row; 5 catches on PR #1202 alone past mechanization breakeven). Script: tools/hygiene/check-role-ref-on-current-state-surfaces.sh Behavior: - Scans closed-list current-state surfaces (CLAUDE.md, AGENTS.md, GOVERNANCE.md, ALIGNMENT.md, CONFLICT-RESOLUTION.md, AGENT-BEST- PRACTICES.md, GLOSSARY.md, WONT-DO.md, VISION.md, ROADMAP.md) - Parses persona-roster from docs/EXPERT-REGISTRY.md (canonical source per B-0162 acceptance criteria) - Plus extra personas (Otto, Amara, Ani, Sova, Rodney, Nazar, Ilyana), human-maintainer names (Aaron, Max), external-AI- instance names (Claude.ai, Codex, Gemini) - Detects attribution patterns (Name + date, Per Name, Name's said/grants/proposed/etc.) - Excludes inline-code spans + likely rule-references - Reports per-violation diagnostic with file/line/suggested-fix Soft-launch default per B-0162 acceptance criterion 9: ships as warning-only (exit 0 with warnings) so existing pre-existing violations across CLAUDE.md/VISION.md/AGENTS.md don't break CI on first land. --strict flag OR ROLE_REF_CHECK_SOFT_LAUNCH=0 env var promotes to error mode. Self-test: 26 violations found across 10 current-state surfaces (mostly pre-existing tech debt predating the role-ref convention that B-0162 codifies). Composes with: - docs/AGENT-BEST-PRACTICES.md Otto-279 carve-out - tools/hygiene/audit-orphan-role-refs.sh (post-strip lint) - tools/hygiene/check-archive-header-section33.sh (sibling pattern) Future work: - Wire into .github/workflows/gate.yml as warning-mode lint job - After soak period (no false positives observed for ~7 days), promote to --strict mode + cleanup pre-existing violations - Pre-commit hook integration (.husky/ or .git/hooks/pre-commit) Per first-principles trust calculus: 5-recurrence past mechanization-breakeven trace from B-0162's P2→P1 promotion checks out; the script earns its landing. Co-Authored-By: Claude Opus 4.7 --- ...heck-role-ref-on-current-state-surfaces.sh | 177 ++++++++++++++++++ 1 file changed, 177 insertions(+) create mode 100755 tools/hygiene/check-role-ref-on-current-state-surfaces.sh diff --git a/tools/hygiene/check-role-ref-on-current-state-surfaces.sh b/tools/hygiene/check-role-ref-on-current-state-surfaces.sh new file mode 100755 index 000000000..30895cfb7 --- /dev/null +++ b/tools/hygiene/check-role-ref-on-current-state-surfaces.sh @@ -0,0 +1,177 @@ +#!/usr/bin/env bash +# +# tools/hygiene/check-role-ref-on-current-state-surfaces.sh — +# validates that current-state surfaces (CLAUDE.md, AGENTS.md, +# GOVERNANCE.md, ALIGNMENT.md, etc.) use role-refs rather than +# direct name attribution per the Otto-279 carve-out documented at +# docs/AGENT-BEST-PRACTICES.md. +# +# Why this exists (B-0162; 5 catches on PR #1202 alone): +# The role-ref convention failure mode recurred 5 times in one +# session (H0Ro / H1ws / H3eE / H8A0+A5 / H9dy on PR #1202). +# Each catch cost ~5-10 minutes thread-resolution work. Total +# spent: ~25-50 min. Mechanization estimate: ~30-60 min. Past +# breakeven; pre-commit catch is faster than post-commit Copilot +# catch. +# +# Per Otto-341 (mechanism over vigilance) + Otto-346 (recurring +# pattern → substrate primitive missing): the right shape is a +# CI lint that fails when a current-state surface contains direct +# name attribution for known persona / human-maintainer / external- +# AI-instance names. +# +# What this checks: +# For every file in the closed current-state-surface list: +# - Scan for `\b\b` patterns where is in the +# known-roster (parsed from docs/EXPERT-REGISTRY.md + +# hardcoded human-maintainer / external-AI names) +# - Distinguish attribution (Name + date OR Name + ' grants ' / 'said' +# etc.) from rule-references (Name-NN, Name's ) +# - Report violations with file/line/suggested-fix +# - Exit non-zero on any violation +# +# What this does NOT do: +# - Does NOT scan history surfaces (memory/, docs/research/**, +# docs/ROUND-HISTORY.md, docs/DECISIONS/**, docs/aurora/**, +# docs/hygiene-history/**, commit messages) +# - Does NOT auto-fix +# - Does NOT enforce on lines that legitimately reference names +# (rule references like "Otto-279" or quoted material) +# +# Composes with: +# - docs/AGENT-BEST-PRACTICES.md (Otto-279 carve-out) +# - tools/hygiene/audit-orphan-role-refs.sh (post-strip lint) +# - tools/hygiene/check-archive-header-section33.sh (sibling pattern) +# - .github/workflows/gate.yml (wired as a lint job — TODO) +# +# Self-test: +# $ tools/hygiene/check-role-ref-on-current-state-surfaces.sh +# → exit 0 if all current-state surfaces are clean +# → exit 1 with per-file diagnostic if any direct name +# attribution is found + +set -euo pipefail + +# Soft-launch flag: when set, exit 0 even on violations (warnings printed +# but build doesn't fail). Per B-0162 acceptance criteria: +# "Soft-launch: ship as warning-only first, then promote to error after +# a soak period (existing pattern per check-tick-history-shard-schema.sh)." +SOFT_LAUNCH="${ROLE_REF_CHECK_SOFT_LAUNCH:-1}" + +# CLI flag override +if [[ "${1:-}" == "--strict" ]]; then + SOFT_LAUNCH=0 +fi + +REPO_ROOT="$(git rev-parse --show-toplevel 2>/dev/null || pwd)" +cd "$REPO_ROOT" + +# Closed-list of current-state surfaces (per Otto-279 carve-out: any +# surface NOT in the history-surface allow-list) +CURRENT_STATE_SURFACES=( + "CLAUDE.md" + "AGENTS.md" + "GOVERNANCE.md" + "docs/ALIGNMENT.md" + "docs/CONFLICT-RESOLUTION.md" + "docs/AGENT-BEST-PRACTICES.md" + "docs/GLOSSARY.md" + "docs/WONT-DO.md" + "docs/VISION.md" + "docs/ROADMAP.md" +) + +# Parse persona-roster from docs/EXPERT-REGISTRY.md +# Format expected: lines containing | **Role** | **Name** | ... | +# Extract the bolded name in column 2. +ROSTER_NAMES=() +if [[ -f "docs/EXPERT-REGISTRY.md" ]]; then + while IFS= read -r name; do + [[ -n "$name" ]] && ROSTER_NAMES+=("$name") + done < <( + grep -E '^\| \*\*[A-Za-z][^|]*\*\* \| \*\*[A-Z][a-z]+\*\* \|' docs/EXPERT-REGISTRY.md \ + | sed -E 's/^\| \*\*[^|]+\*\* \| \*\*([A-Z][a-z]+)\*\* \|.*/\1/' + ) +fi + +# Plus persona names that don't appear in EXPERT-REGISTRY but are +# load-bearing in this project's substrate +EXTRA_PERSONAS=("Otto" "Amara" "Ani" "Sova" "Rodney" "Nazar" "Ilyana") + +# Plus human-maintainer names (CURRENT-*.md filenames define this) +HUMAN_NAMES=("Aaron" "Max") + +# Plus external-AI-instance names (when used as instance attribution, +# NOT as tool/SDK references) +EXTERNAL_AI_NAMES=("Claude.ai" "Codex" "Gemini") + +ALL_NAMES=("${ROSTER_NAMES[@]}" "${EXTRA_PERSONAS[@]}" "${HUMAN_NAMES[@]}" "${EXTERNAL_AI_NAMES[@]}") + +VIOLATIONS=0 + +for surface in "${CURRENT_STATE_SURFACES[@]}"; do + [[ ! -f "$surface" ]] && continue + + for name in "${ALL_NAMES[@]}"; do + # Skip empty names + [[ -z "$name" ]] && continue + + # Pattern: word-boundary name followed by attribution context. + # Attribution-context patterns that indicate violation: + # - "Name YYYY-MM-DD" (date-stamped attribution) + # - "Per Name" (per-attribution) + # - "Name's " (Name's said/grants/proposed/etc.) + # + # Exclude (rule references, NOT violations): + # - "Name-NN" (Otto-279, Otto-340 — rule numbers) + # - Inline code: `Name` in backticks + # + # The grep pattern below captures attribution forms. + # For Claude.ai we need a slightly different boundary handling + # because of the dot. + + if [[ "$name" == "Claude.ai" ]]; then + pattern="\\bClaude\\.ai( 2026|: |[ '][a-z])" + else + pattern="\\b${name}( 2026| 2027|'s [a-z]| said| grants| proposed| asked| corrected| confirmed| disclosed)" + fi + + # grep for the pattern, exclude inline-code lines (start with ` or contain ``) + matches=$(grep -nE "$pattern" "$surface" 2>/dev/null \ + | grep -v -E '^[0-9]+:\s*```' \ + | grep -v -F "\`${name}\`" \ + || true) + + if [[ -n "$matches" ]]; then + while IFS= read -r line; do + # Extract line number for clearer reporting + echo "VIOLATION: ${surface}: direct name attribution '${name}' on current-state surface" >&2 + echo " ${line}" >&2 + echo " Fix: replace with role-ref (e.g., 'the human maintainer', 'the architect')" >&2 + echo " OR move to history surface (memory/, docs/research/**, etc.)" >&2 + VIOLATIONS=$((VIOLATIONS + 1)) + done <<< "$matches" + fi + done +done + +echo "" >&2 +echo "checked ${#CURRENT_STATE_SURFACES[@]} current-state surfaces; ${VIOLATIONS} violations" >&2 + +if [[ $VIOLATIONS -gt 0 ]]; then + echo "" >&2 + echo "Per docs/AGENT-BEST-PRACTICES.md Otto-279 carve-out:" >&2 + echo " current-state surfaces use role-refs ('the maintainer', 'the architect')" >&2 + echo " persona / human / external-AI names are reserved for history surfaces" >&2 + echo " (memory/**, docs/research/**, docs/ROUND-HISTORY.md, docs/DECISIONS/**," >&2 + echo " docs/aurora/**, docs/hygiene-history/**, commit messages)" >&2 + if [[ "$SOFT_LAUNCH" == "1" ]]; then + echo "" >&2 + echo "[SOFT-LAUNCH MODE: exit 0 despite violations. Set --strict OR" >&2 + echo " ROLE_REF_CHECK_SOFT_LAUNCH=0 to enforce.]" >&2 + exit 0 + fi + exit 1 +fi + +exit 0