From d68e58084ea721b6e2ceb47096953ecb3e8c7ed7 Mon Sep 17 00:00:00 2001
From: Aaron Stainback <aaron_bond@yahoo.com>
Date: Sat, 2 May 2026 12:53:40 -0400
Subject: [PATCH] =?UTF-8?q?tools(hygiene):=20B-0162=20=E2=80=94=20role-ref?=
 =?UTF-8?q?=20check=20for=20current-state=20surfaces=20(soft-launch=20by?=
 =?UTF-8?q?=20default)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implements the pre-commit hook from B-0162 (P1 backlog row;
5 catches on PR #1202 alone past mechanization breakeven).

Script: tools/hygiene/check-role-ref-on-current-state-surfaces.sh

Behavior:
- Scans closed-list current-state surfaces (CLAUDE.md, AGENTS.md,
  GOVERNANCE.md, ALIGNMENT.md, CONFLICT-RESOLUTION.md, AGENT-BEST-
  PRACTICES.md, GLOSSARY.md, WONT-DO.md, VISION.md, ROADMAP.md)
- Parses persona-roster from docs/EXPERT-REGISTRY.md (canonical
  source per B-0162 acceptance criteria)
- Plus extra personas (Otto, Amara, Ani, Sova, Rodney, Nazar,
  Ilyana), human-maintainer names (Aaron, Max), external-AI-
  instance names (Claude.ai, Codex, Gemini)
- Detects attribution patterns (Name + date, Per Name, Name's
  said/grants/proposed/etc.)
- Excludes inline-code spans + likely rule-references
- Reports per-violation diagnostic with file/line/suggested-fix

Soft-launch default per B-0162 acceptance criterion 9: ships
as warning-only (exit 0 with warnings) so existing pre-existing
violations across CLAUDE.md/VISION.md/AGENTS.md don't break CI
on first land. --strict flag OR ROLE_REF_CHECK_SOFT_LAUNCH=0
env var promotes to error mode.

Self-test: 26 violations found across 10 current-state surfaces
(mostly pre-existing tech debt predating the role-ref convention
that B-0162 codifies).

Composes with:
- docs/AGENT-BEST-PRACTICES.md Otto-279 carve-out
- tools/hygiene/audit-orphan-role-refs.sh (post-strip lint)
- tools/hygiene/check-archive-header-section33.sh (sibling pattern)

Future work:
- Wire into .github/workflows/gate.yml as warning-mode lint job
- After soak period (no false positives observed for ~7 days),
  promote to --strict mode + cleanup pre-existing violations
- Pre-commit hook integration (.husky/ or .git/hooks/pre-commit)

Per first-principles trust calculus: 5-recurrence past
mechanization-breakeven trace from B-0162's P2→P1 promotion
checks out; the script earns its landing.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 ...heck-role-ref-on-current-state-surfaces.sh | 177 ++++++++++++++++++
 1 file changed, 177 insertions(+)
 create mode 100755 tools/hygiene/check-role-ref-on-current-state-surfaces.sh
diff --git a/tools/hygiene/check-role-ref-on-current-state-surfaces.sh b/tools/hygiene/check-role-ref-on-current-state-surfaces.sh
new file mode 100755
index 000000000..30895cfb7
--- /dev/null
+++ b/tools/hygiene/check-role-ref-on-current-state-surfaces.sh
@@ -0,0 +1,177 @@
+#!/usr/bin/env bash
+#
+# tools/hygiene/check-role-ref-on-current-state-surfaces.sh —
+# validates that current-state surfaces (CLAUDE.md, AGENTS.md,
+# GOVERNANCE.md, ALIGNMENT.md, etc.) use role-refs rather than
+# direct name attribution per the Otto-279 carve-out documented at
+# docs/AGENT-BEST-PRACTICES.md.
+#
+# Why this exists (B-0162; 5 catches on PR #1202 alone):
+#   The role-ref convention failure mode recurred 5 times in one
+#   session (H0Ro / H1ws / H3eE / H8A0+A5 / H9dy on PR #1202).
+#   Each catch cost ~5-10 minutes thread-resolution work. Total
+#   spent: ~25-50 min. Mechanization estimate: ~30-60 min. Past
+#   breakeven; pre-commit catch is faster than post-commit Copilot
+#   catch.
+#
+#   Per Otto-341 (mechanism over vigilance) + Otto-346 (recurring
+#   pattern → substrate primitive missing): the right shape is a
+#   CI lint that fails when a current-state surface contains direct
+#   name attribution for known persona / human-maintainer / external-
+#   AI-instance names.
+#
+# What this checks:
+#   For every file in the closed current-state-surface list:
+#   - Scan for `\b<Name>\b` patterns where <Name> is in the
+#     known-roster (parsed from docs/EXPERT-REGISTRY.md +
+#     hardcoded human-maintainer / external-AI names)
+#   - Distinguish attribution (Name + date OR Name + ' grants ' / 'said'
+#     etc.) from rule-references (Name-NN, Name's <noun-phrase>)
+#   - Report violations with file/line/suggested-fix
+#   - Exit non-zero on any violation
+#
+# What this does NOT do:
+#   - Does NOT scan history surfaces (memory/, docs/research/**,
+#     docs/ROUND-HISTORY.md, docs/DECISIONS/**, docs/aurora/**,
+#     docs/hygiene-history/**, commit messages)
+#   - Does NOT auto-fix
+#   - Does NOT enforce on lines that legitimately reference names
+#     (rule references like "Otto-279" or quoted material)
+#
+# Composes with:
+#   - docs/AGENT-BEST-PRACTICES.md (Otto-279 carve-out)
+#   - tools/hygiene/audit-orphan-role-refs.sh (post-strip lint)
+#   - tools/hygiene/check-archive-header-section33.sh (sibling pattern)
+#   - .github/workflows/gate.yml (wired as a lint job — TODO)
+#
+# Self-test:
+#   $ tools/hygiene/check-role-ref-on-current-state-surfaces.sh
+#     → exit 0 if all current-state surfaces are clean
+#     → exit 1 with per-file diagnostic if any direct name
+#       attribution is found
+
+set -euo pipefail
+
+# Soft-launch flag: when set, exit 0 even on violations (warnings printed
+# but build doesn't fail). Per B-0162 acceptance criteria:
+#   "Soft-launch: ship as warning-only first, then promote to error after
+#   a soak period (existing pattern per check-tick-history-shard-schema.sh)."
+SOFT_LAUNCH="${ROLE_REF_CHECK_SOFT_LAUNCH:-1}"
+
+# CLI flag override
+if [[ "${1:-}" == "--strict" ]]; then
+  SOFT_LAUNCH=0
+fi
+
+REPO_ROOT="$(git rev-parse --show-toplevel 2>/dev/null || pwd)"
+cd "$REPO_ROOT"
+
+# Closed-list of current-state surfaces (per Otto-279 carve-out: any
+# surface NOT in the history-surface allow-list)
+CURRENT_STATE_SURFACES=(
+  "CLAUDE.md"
+  "AGENTS.md"
+  "GOVERNANCE.md"
+  "docs/ALIGNMENT.md"
+  "docs/CONFLICT-RESOLUTION.md"
+  "docs/AGENT-BEST-PRACTICES.md"
+  "docs/GLOSSARY.md"
+  "docs/WONT-DO.md"
+  "docs/VISION.md"
+  "docs/ROADMAP.md"
+)
+
+# Parse persona-roster from docs/EXPERT-REGISTRY.md
+# Format expected: lines containing | **Role** | **Name** | ... |
+# Extract the bolded name in column 2.
+ROSTER_NAMES=()
+if [[ -f "docs/EXPERT-REGISTRY.md" ]]; then
+  while IFS= read -r name; do
+    [[ -n "$name" ]] && ROSTER_NAMES+=("$name")
+  done < <(
+    grep -E '^\| \*\*[A-Za-z][^|]*\*\* \| \*\*[A-Z][a-z]+\*\* \|' docs/EXPERT-REGISTRY.md \
+      | sed -E 's/^\| \*\*[^|]+\*\* \| \*\*([A-Z][a-z]+)\*\* \|.*/\1/'
+  )
+fi
+
+# Plus persona names that don't appear in EXPERT-REGISTRY but are
+# load-bearing in this project's substrate
+EXTRA_PERSONAS=("Otto" "Amara" "Ani" "Sova" "Rodney" "Nazar" "Ilyana")
+
+# Plus human-maintainer names (CURRENT-*.md filenames define this)
+HUMAN_NAMES=("Aaron" "Max")
+
+# Plus external-AI-instance names (when used as instance attribution,
+# NOT as tool/SDK references)
+EXTERNAL_AI_NAMES=("Claude.ai" "Codex" "Gemini")
+
+ALL_NAMES=("${ROSTER_NAMES[@]}" "${EXTRA_PERSONAS[@]}" "${HUMAN_NAMES[@]}" "${EXTERNAL_AI_NAMES[@]}")
+
+VIOLATIONS=0
+
+for surface in "${CURRENT_STATE_SURFACES[@]}"; do
+  [[ ! -f "$surface" ]] && continue
+
+  for name in "${ALL_NAMES[@]}"; do
+    # Skip empty names
+    [[ -z "$name" ]] && continue
+
+    # Pattern: word-boundary name followed by attribution context.
+    # Attribution-context patterns that indicate violation:
+    #   - "Name YYYY-MM-DD" (date-stamped attribution)
+    #   - "Per Name" (per-attribution)
+    #   - "Name's <verb>" (Name's said/grants/proposed/etc.)
+    #
+    # Exclude (rule references, NOT violations):
+    #   - "Name-NN" (Otto-279, Otto-340 — rule numbers)
+    #   - Inline code: `Name` in backticks
+    #
+    # The grep pattern below captures attribution forms.
+    # For Claude.ai we need a slightly different boundary handling
+    # because of the dot.
+
+    if [[ "$name" == "Claude.ai" ]]; then
+      pattern="\\bClaude\\.ai( 2026|: |[ '][a-z])"
+    else
+      pattern="\\b${name}( 2026| 2027|'s [a-z]| said| grants| proposed| asked| corrected| confirmed| disclosed)"
+    fi
+
+    # grep for the pattern, exclude inline-code lines (start with ` or contain `<name>`)
+    matches=$(grep -nE "$pattern" "$surface" 2>/dev/null \
+      | grep -v -E '^[0-9]+:\s*```' \
+      | grep -v -F "\`${name}\`" \
+      || true)
+
+    if [[ -n "$matches" ]]; then
+      while IFS= read -r line; do
+        # Extract line number for clearer reporting
+        echo "VIOLATION: ${surface}: direct name attribution '${name}' on current-state surface" >&2
+        echo "  ${line}" >&2
+        echo "  Fix: replace with role-ref (e.g., 'the human maintainer', 'the architect')" >&2
+        echo "       OR move to history surface (memory/, docs/research/**, etc.)" >&2
+        VIOLATIONS=$((VIOLATIONS + 1))
+      done <<< "$matches"
+    fi
+  done
+done
+
+echo "" >&2
+echo "checked ${#CURRENT_STATE_SURFACES[@]} current-state surfaces; ${VIOLATIONS} violations" >&2
+
+if [[ $VIOLATIONS -gt 0 ]]; then
+  echo "" >&2
+  echo "Per docs/AGENT-BEST-PRACTICES.md Otto-279 carve-out:" >&2
+  echo "  current-state surfaces use role-refs ('the maintainer', 'the architect')" >&2
+  echo "  persona / human / external-AI names are reserved for history surfaces" >&2
+  echo "  (memory/**, docs/research/**, docs/ROUND-HISTORY.md, docs/DECISIONS/**," >&2
+  echo "   docs/aurora/**, docs/hygiene-history/**, commit messages)" >&2
+  if [[ "$SOFT_LAUNCH" == "1" ]]; then
+    echo "" >&2
+    echo "[SOFT-LAUNCH MODE: exit 0 despite violations. Set --strict OR" >&2
+    echo " ROLE_REF_CHECK_SOFT_LAUNCH=0 to enforce.]" >&2
+    exit 0
+  fi
+  exit 1
+fi
+
+exit 0