Lucent-Financial-Group · AceHack · Apr 24, 2026 · Apr 24, 2026 · Apr 24, 2026
diff --git a/tools/lint/doc-comment-history-audit.baseline b/tools/lint/doc-comment-history-audit.baseline
@@ -0,0 +1,82 @@
+src/Core/Graph.fs:122:graduation
+src/Core/Graph.fs:13:Otto-123
+src/Core/Graph.fs:19:Aaron,Otto-121
+src/Core/Graph.fs:197:ferry
+src/Core/Graph.fs:198:ferry
+src/Core/Graph.fs:20:Amara
+src/Core/Graph.fs:206:graduation
+src/Core/Graph.fs:208:Aaron,Amara,Provenance:
+src/Core/Graph.fs:209:ferry
+src/Core/Graph.fs:21:Otto-122
+src/Core/Graph.fs:210:graduation
+src/Core/Graph.fs:23:graduation,Otto-105
+src/Core/Graph.fs:24:ferry
+src/Core/Graph.fs:27:graduation
+src/Core/Graph.fs:32:Otto-105
+src/Core/Graph.fs:321:Provenance:
+src/Core/Graph.fs:33:graduation
+src/Core/Graph.fs:383:graduation
+src/Core/Graph.fs:386:ferry,Provenance:
+src/Core/Graph.fs:387:ferry
+src/Core/Graph.fs:40:graduation
+src/Core/Graph.fs:482:Amara,Otto-132
+src/Core/Graph.fs:485:Amara,ferry
+src/Core/Graph.fs:489:graduation
+src/Core/Graph.fs:492:Amara,ferry
+src/Core/Graph.fs:498:ferry,Provenance:
+src/Core/Graph.fs:499:graduation
+src/Core/Graph.fs:533:Amara
+src/Core/Graph.fs:534:ferry
+src/Core/Graph.fs:558:Amara,ferry
+src/Core/Graph.fs:563:Provenance:
+src/Core/Graph.fs:564:courier,ferry
+src/Core/Graph.fs:566:graduation
+src/Core/Graph.fs:567:Otto-105
+src/Core/PhaseExtraction.fs:11:Amara,ferry
+src/Core/PhaseExtraction.fs:32:Amara,ferry,Provenance:
+src/Core/PhaseExtraction.fs:34:graduation
+src/Core/RobustStats.fs:10:Amara,ferry,graduation
+src/Core/RobustStats.fs:11:Otto-105
+src/Core/RobustStats.fs:130:Amara,ferry,Provenance:
+src/Core/RobustStats.fs:31:Amara
+src/Core/RobustStats.fs:37:graduation
+src/Core/RobustStats.fs:43:Amara,ferry
+src/Core/RobustStats.fs:78:Amara,ferry
+src/Core/RobustStats.fs:8:Amara
+src/Core/RobustStats.fs:80:ferry
+src/Core/RobustStats.fs:9:courier,ferry
+src/Core/Veridicality.fs:118:Amara,ferry
+src/Core/Veridicality.fs:130:Amara,ferry
+src/Core/Veridicality.fs:17:graduation
+src/Core/Veridicality.fs:21:Amara,ferry
+src/Core/Veridicality.fs:22:ferry,graduation
+src/Core/Veridicality.fs:27:Aaron
+src/Core/Veridicality.fs:29:Aaron,Amara,Otto-112
+src/Core/Veridicality.fs:31:ferry
+src/Core/Veridicality.fs:32:Amara
+src/Core/Veridicality.fs:35:graduation,Otto-105
+src/Core/Veridicality.fs:36:graduation
+src/Core/Veridicality.fs:53:Amara,ferry
+src/Core/Veridicality.fs:55:ferry
+src/Core/Veridicality.fs:9:Amara
+src/Core/Veridicality.fs:95:Amara,ferry
+tests/Tests.FSharp/Algebra/TemporalCoordinationDetection.Tests.fs:209:ferry
+tests/Tests.FSharp/Operators/RecursiveSemiNaive.Boundary.Tests.fs:22:Amara,courier
+tools/hygiene/audit-cross-platform-parity.sh:18:Aaron
+tools/hygiene/audit-cross-platform-parity.sh:32:Aaron
+tools/hygiene/audit-cross-platform-parity.sh:48:Aaron
+tools/hygiene/audit-machine-specific-content.sh:12:Aaron,Otto-27
+tools/hygiene/audit-memory-references.sh:19:Aaron
+tools/hygiene/audit-memory-references.sh:6:Amara,ferry
+tools/hygiene/audit-tick-history-bounded-growth.sh:14:Aaron
+tools/hygiene/capture-tick-snapshot.sh:12:Amara,ferry
+tools/hygiene/capture-tick-snapshot.sh:41:Amara
+tools/lint/doc-comment-history-audit.sh:77:Attribution:,Provenance:
+tools/lint/no-empty-dirs.sh:5:Aaron
+tools/setup/common/profile-edit.sh:34:Aaron
+tools/setup/common/sync-upstreams.sh:8:Aaron
+tools/setup/common/verifiers.sh:38:Aaron
+tools/setup/common/verifiers.sh:7:Aaron
+tools/setup/doctor.sh:11:Aaron
+tools/setup/doctor.sh:79:Aaron
+tools/setup/macos.sh:30:Aaron
diff --git a/tools/lint/doc-comment-history-audit.sh b/tools/lint/doc-comment-history-audit.sh
@@ -0,0 +1,227 @@
+#!/usr/bin/env bash
+#
+# tools/lint/doc-comment-history-audit.sh — scan source doc comments
+# for factory-process tokens that belong in PR descriptions, history
+# files, or round-notes rather than in code.
+#
+# The rule: a code-file comment (`///`, `//`, `#`) should explain
+# what the code DOES — math, invariants, input contracts,
+# composition guidance. It should not carry process-lineage tags
+# (which round shipped it, which external collaborator formalised
+# it, which correction number motivated a tweak, which persona
+# takes credit). That content belongs in the PR description, the
+# commit message, `docs/hygiene-history/**`, or memory files.
+#
+# Scope:
+#   - src/**/*.fs, src/**/*.cs
+#   - tests/**/*.fs, tests/**/*.cs
+#   - bench/**/*.fs
+#   - tools/**/*.sh, tools/**/*.ts, tools/**/*.fs
+#
+# NOT scanned (these legitimately carry history):
+#   - docs/hygiene-history/**, docs/DECISIONS/**, docs/ROUND-HISTORY.md
+#   - openspec/** (spec files — history is part of the spec)
+#   - memory/** (memory is by design historical)
+#   - .git/, bin/, obj/, vendored mirrors
+#
+# Flagged tokens are defined in TOKEN_PATTERN below. Each token is
+# chosen for high signal + low false-positive rate: factory-process
+# terms (round tags, personas by name, cadence jargon) and
+# attribution-paragraph headers. If a token produces false
+# positives in legitimate code, tighten the regex rather than
+# allowlisting the file.
+#
+# Only scans COMMENT LINES (lines whose first non-whitespace is one
+# of `///`, `//`, `#`). Prose matching in code bodies is not a
+# concern — if a flagged token appears in a string literal or
+# variable name that's a separate conversation.
+#
+# Usage:
+#   tools/lint/doc-comment-history-audit.sh
+#                         # audit mode: print violations, exit 1 if
+#                         # any violation is NOT in the baseline
+#   tools/lint/doc-comment-history-audit.sh --list
+#                         # print every violation file:line:token,
+#                         # exit 0 regardless of baseline
+#   tools/lint/doc-comment-history-audit.sh --fail-any
+#                         # strict mode: exit 1 on ANY violation
+#                         # (for post-cleanup use once baseline is
+#                         # empty)
+#   tools/lint/doc-comment-history-audit.sh --regenerate-baseline
+#                         # overwrite the baseline with current
+#                         # state; use only when a PR legitimately
+#                         # shuffles allowlisted lines
+#
+# Baseline: tools/lint/doc-comment-history-audit.baseline — one
+# entry per line in `file:line:token` form. Represents violations
+# that exist TODAY; the lint fails only on violations that don't
+# appear there, so existing debt doesn't block commits while
+# cleanup PRs drain it.
+
+set -euo pipefail
+
+REPO_ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
+cd "$REPO_ROOT"
+
+BASELINE_FILE="tools/lint/doc-comment-history-audit.baseline"
+MODE="${1:-check}"
+
+# ---- Token list --------------------------------------------------------------
+# Alternation of tokens. Word-boundary handling is done inside awk
+# (portable across GNU awk and BSD awk) rather than via `\b`, which
+# is not portable in ERE (`grep -E`) — BSD grep treats `\b` as a
+# literal `b`, silently missing matches on macOS. The awk loop in
+# `collect_violations` checks that the character before and after
+# each match is a non-word character (`[^A-Za-z0-9_]`) for tokens
+# that need word-boundary protection. Tokens ending in `:` (e.g.
+# `Provenance:`, `Attribution:`) do not need a trailing boundary.
+TOKEN_PATTERN='(Otto-[0-9]+|Amara|Aaron|ferry|courier|graduation|Provenance:|Attribution:)'
+
+# ---- Files to scan -----------------------------------------------------------
+scan_files() {
+  # Use find with explicit includes; exclude vendored / build / spec
+  # / history trees where factory-history tokens are legitimate.
+  find src tests bench tools \
+    \( -name '*.fs' -o -name '*.cs' -o -name '*.sh' -o -name '*.ts' \) \
+    -not -path '*/bin/*' \
+    -not -path '*/obj/*' \
+    -not -path '*/.venv/*' \
+    -not -path '*/node_modules/*' \
+    -type f \
+    2>/dev/null
+}
+
+# ---- Violation extraction ----------------------------------------------------
+# For each file: extract comment lines only (leading `///`, `//`, `#`),
+# match tokens from TOKEN_PATTERN with explicit word-boundary checks
+# (for portability between GNU awk and BSD awk — `\b` is not portable
+# in POSIX ERE), and emit `file:line:token1,token2,...` tuples where
+# the token list is sorted + deduplicated. Emitting every token per
+# line (not just the first) ensures the baseline comparison in
+# default mode catches the case where a baselined line gains a new
+# forbidden token — the record changes, and the new record is flagged.
+collect_violations() {
+  local file
+  while IFS= read -r file; do
+    awk -v pat="$TOKEN_PATTERN" -v fname="$file" '
+      # Is this a comment line? (F# ///, F#/C# //, shell # but not
+      # shebang #!).
+      function is_comment_line(s) {
+        if (s ~ /^[[:space:]]*\/\/\//) return 1
+        if (s ~ /^[[:space:]]*\/\//) return 1
+        if (s ~ /^[[:space:]]*#!/) return 0
+        if (s ~ /^[[:space:]]*#/) return 1
+        return 0
+      }
+      # Non-word char boundary check: given position p in string s,
+      # return 1 if char at p is a non-word character (or position
+      # is out of bounds). Word chars are [A-Za-z0-9_].
+      function is_boundary(s, p,    c) {
+        if (p < 1 || p > length(s)) return 1
+        c = substr(s, p, 1)
+        return (c !~ /[A-Za-z0-9_]/)
+      }
+      {
+        if (!is_comment_line($0)) next
+        # Collect all token matches on the line with explicit
+        # word-boundary validation. Tokens ending in ":" skip the
+        # trailing boundary check (the ":" already isolates them).
+        rest = $0
+        offset = 0
+        delete seen
+        n = 0
+        while (match(rest, pat)) {
+          start = RSTART
+          len = RLENGTH
+          tok = substr(rest, start, len)
+          absstart = offset + start
+          absend = absstart + len - 1
+          trailing_needs_boundary = (substr(tok, len, 1) != ":")
+          if (is_boundary($0, absstart - 1) && \
+              (!trailing_needs_boundary || is_boundary($0, absend + 1))) {
+            if (!(tok in seen)) {
+              seen[tok] = 1
+              tokens[++n] = tok
+            }
+          }
+          # Advance past this match to find further tokens on the
+          # same line.
+          rest = substr(rest, start + len)
+          offset = offset + start + len - 1
+        }
+        if (n == 0) next
+        # Sort tokens and join with comma (insertion sort — n is
+        # tiny, typically 1).
+        for (i = 2; i <= n; i++) {
+          key = tokens[i]
+          j = i - 1
+          while (j >= 1 && tokens[j] > key) {
+            tokens[j+1] = tokens[j]
+            j--
+          }
+          tokens[j+1] = key
+        }
+        joined = tokens[1]
+        for (i = 2; i <= n; i++) joined = joined "," tokens[i]
+        printf "%s:%d:%s\n", fname, NR, joined
+        delete tokens
+      }
+    ' "$file"
+  done < <(scan_files)
+}
+
+# ---- Modes -------------------------------------------------------------------
+case "$MODE" in
+  --list)
+    collect_violations | sort
+    exit 0
+    ;;
+  --fail-any)
+    violations=$(collect_violations | sort)
+    if [ -n "$violations" ]; then
+      echo "doc-comment-history-audit: violations found (strict mode):" >&2
+      printf '%s\n' "$violations" >&2
+      count=$(printf '%s\n' "$violations" | wc -l | tr -d ' ')
+      echo "doc-comment-history-audit: $count violation(s); see" >&2
+      echo "  memory/feedback_code_comments_explain_code_not_history_otto_220_2026_04_24.md" >&2
+      exit 1
+    fi
+    echo "doc-comment-history-audit: no violations (strict mode clean)"
+    exit 0
+    ;;
+  --regenerate-baseline)
+    collect_violations | sort > "$BASELINE_FILE"
+    count=$(wc -l < "$BASELINE_FILE" | tr -d ' ')
+    echo "doc-comment-history-audit: baseline regenerated with $count entries" >&2
+    echo "  -> $BASELINE_FILE" >&2
+    exit 0
+    ;;
+  check|'')
+    # Default mode: fail on violations not in baseline.
+    if [ ! -f "$BASELINE_FILE" ]; then
+      echo "doc-comment-history-audit: baseline missing at $BASELINE_FILE" >&2
+      echo "  regenerate with: $0 --regenerate-baseline" >&2
+      exit 2
+    fi
+    current=$(collect_violations | sort)
+    # New violations = current minus baseline.
+    new_violations=$(comm -23 <(printf '%s\n' "$current") <(sort "$BASELINE_FILE"))
+    if [ -n "$new_violations" ]; then
+      echo "doc-comment-history-audit: new violations not in baseline:" >&2
+      printf '%s\n' "$new_violations" >&2
+      count=$(printf '%s\n' "$new_violations" | wc -l | tr -d ' ')
+      echo "doc-comment-history-audit: $count new violation(s); see" >&2
+      echo "  memory/feedback_code_comments_explain_code_not_history_otto_220_2026_04_24.md" >&2
+      echo "  to legitimize a moved line, run: $0 --regenerate-baseline" >&2
+      exit 1
+    fi
+    baseline_count=$(wc -l < "$BASELINE_FILE" | tr -d ' ')
+    echo "doc-comment-history-audit: no new violations ($baseline_count entries in baseline)"
+    exit 0
+    ;;
+  *)
+    echo "doc-comment-history-audit: unknown mode '$MODE'" >&2
+    echo "usage: $0 [--list|--fail-any|--regenerate-baseline]" >&2
+    exit 2
+    ;;
+esac