diff --git a/.claude/settings.json b/.claude/settings.json
index 2b7f98a7..36c4619f 100644
--- a/.claude/settings.json
+++ b/.claude/settings.json
@@ -6,7 +6,16 @@
         "hooks": [
           {
             "type": "command",
-            "command": "python3 -c \"import json,sys,re; d=json.load(sys.stdin); c=d.get('tool_input',{}).get('command',''); ops=('git commit','git merge ','git pull','git rebase --continue'); [print('bicameral: git write-op detected — call bicameral.link_commit(commit_hash=\\'HEAD\\') now to sync the decision ledger') for _ in [1] if any(op in c for op in ops)]\""
+            "command": "python3 scripts/hooks/post_commit_sync_reminder.py"
+          }
+        ]
+      },
+      {
+        "matcher": "mcp__bicameral__bicameral_preflight",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "python3 scripts/hooks/post_preflight_capture_reminder.py"
           }
         ]
       }
@@ -16,7 +25,17 @@
         "hooks": [
           {
             "type": "command",
-            "command": "[ -d .bicameral ] && claude -p '/bicameral:capture-corrections' || true"
+            "command": "[ -d .bicameral ] && [ -z \"$BICAMERAL_SESSION_END_RUNNING\" ] && BICAMERAL_SESSION_END_RUNNING=1 claude -p '/bicameral:capture-corrections --auto-ingest' || true"
+          }
+        ]
+      }
+    ],
+    "UserPromptSubmit": [
+      {
+        "hooks": [
+          {
+            "type": "command",
+            "command": "python3 scripts/hooks/preflight_reminder.py"
           }
         ]
       }
diff --git a/.github/workflows/label-merged-to-dev.yml b/.github/workflows/label-merged-to-dev.yml
new file mode 100644
index 00000000..c97c99a8
--- /dev/null
+++ b/.github/workflows/label-merged-to-dev.yml
@@ -0,0 +1,70 @@
+name: Apply merged-to-dev label
+
+on:
+  pull_request:
+    branches: [dev]
+    types: [closed]
+
+jobs:
+  label:
+    name: Label closed-by-PR issues
+    runs-on: ubuntu-latest
+    if: github.event.pull_request.merged == true
+    permissions:
+      issues: write
+      pull-requests: read
+    steps:
+      - name: Apply merged-to-dev label
+        uses: actions/github-script@v7
+        with:
+          script: |
+            // Workflow caveat: this job needs Settings -> Actions ->
+            // General -> Workflow permissions set to "Read and write
+            // permissions" at the repo level. The job-level
+            // `permissions: issues: write` block can only NARROW what
+            // the repo allows, never expand it. If the repo default
+            // is read-only, addLabels returns 403 "Resource not
+            // accessible by integration" regardless of the job-level
+            // grant.
+            //
+            // See: #115 (root cause + symptoms) and #104 (admin-side
+            // fix tracked alongside branch-protection setup).
+            //
+            // GitHub close keywords (case-insensitive): close, closes,
+            // closed, fix, fixes, fixed, resolve, resolves, resolved.
+            const pr = context.payload.pull_request;
+            const body = pr.body || "";
+            const closeRegex = /(?:close[sd]?|fix(?:es|ed)?|resolve[sd]?)\s+#(\d+)/gi;
+            const matches = [...body.matchAll(closeRegex)];
+            const issues = [...new Set(matches.map(m => parseInt(m[1])))];
+
+            const failed = [];
+            for (const num of issues) {
+              try {
+                await github.rest.issues.addLabels({
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  issue_number: num,
+                  labels: ["merged-to-dev"]
+                });
+                console.log(`Labeled #${num}`);
+              } catch (e) {
+                console.log(`Failed to label #${num}: ${e.message}`);
+                failed.push({ num, message: e.message });
+              }
+            }
+
+            if (failed.length > 0) {
+              const summary = failed
+                .map(f => `  - #${f.num}: ${f.message}`)
+                .join("\n");
+              throw new Error(
+                `merged-to-dev labeller could not label ${failed.length} ` +
+                `issue(s) referenced by PR #${pr.number}:\n${summary}\n\n` +
+                `Most likely cause: repo Settings -> Actions -> General -> ` +
+                `Workflow permissions is set to read-only. ` +
+                `Job-level "permissions: issues: write" cannot expand a ` +
+                `read-only repo default. See #104 (admin fix) and ` +
+                `#115 (root cause).`
+              );
+            }
diff --git a/.github/workflows/lint-and-typecheck.yml b/.github/workflows/lint-and-typecheck.yml
new file mode 100644
index 00000000..a8f8bd5d
--- /dev/null
+++ b/.github/workflows/lint-and-typecheck.yml
@@ -0,0 +1,24 @@
+name: Lint & Type Check
+
+on:
+  pull_request:
+    branches: [main, dev]
+
+jobs:
+  lint:
+    name: ruff + mypy
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+          cache: 'pip'
+      - name: Install
+        run: pip install -e ".[test]"
+      - name: Ruff check
+        run: ruff check .
+      - name: Ruff format check
+        run: ruff format --check .
+      - name: Mypy
+        run: mypy .
diff --git a/.github/workflows/secret-scan.yml b/.github/workflows/secret-scan.yml
new file mode 100644
index 00000000..7a04f54f
--- /dev/null
+++ b/.github/workflows/secret-scan.yml
@@ -0,0 +1,24 @@
+name: Secret Scan
+
+on:
+  pull_request:
+    branches: [main, dev]
+
+# gitleaks-action@v2 requires a paid license for organizations
+# (https://github.com/gitleaks/gitleaks-action#-announcement).
+# We use trufflehog instead — free for all repos, equally capable
+# detector ruleset, and faster cold-start than spinning up a
+# gitleaks container.
+jobs:
+  trufflehog:
+    name: TruffleHog
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0  # full history so trufflehog can scan the diff range
+      - uses: trufflesecurity/trufflehog@main
+        with:
+          base: ${{ github.event.pull_request.base.sha }}
+          head: ${{ github.event.pull_request.head.sha }}
+          extra_args: --only-verified
diff --git a/.github/workflows/test-mcp-regression.yml b/.github/workflows/test-mcp-regression.yml
index 4336950e..fdcacc0e 100644
--- a/.github/workflows/test-mcp-regression.yml
+++ b/.github/workflows/test-mcp-regression.yml
@@ -2,7 +2,7 @@ name: MCP Regression Tests
 
 on:
   pull_request:
-    branches: [main]
+    branches: [main, dev]
 
 env:
   PYTHON_VERSION: '3.11'
@@ -10,7 +10,12 @@ env:
 jobs:
   mcp-tests:
     name: MCP Regression Suite
-    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, windows-latest]
+    runs-on: ${{ matrix.os }}
+    timeout-minutes: 20
     # Needed so ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} / ANTHROPIC_API_KEY
     # (environment secrets scoped to `ci-test`) is injected into the M1
     # extraction step. The env is gate-free so this does not block
@@ -47,7 +52,10 @@ jobs:
       # ── Clone OSS repos for eval ground truth ────────────────────────
       # Only medusa is needed — saleor/vendure were used by eval_code_locator.py
       # which was removed in v0.6.4 when search_code was nuked.
+      # Ubuntu-only: bash function syntax + medusa corpus consumed by
+      # the Linux-only M1 adversarial eval and E2E report below.
       - name: Clone eval repos (shallow, pinned commits)
+        if: matrix.os == 'ubuntu-latest'
         run: |
           clone_at_commit() {
             local repo_url=$1 dest=$2 commit=$3
@@ -80,6 +88,7 @@ jobs:
       # "secret is not set" from "secret is set to empty string" from
       # "secret is set correctly" without ever exposing the key.
       - name: M1 secret visibility probe
+        if: matrix.os == 'ubuntu-latest'
         run: |
           set +e
           if [ -n "${ANTHROPIC_API_KEY}" ]; then
@@ -109,6 +118,7 @@ jobs:
       # as a red "M1 adversarial" step in the job without failing the
       # whole build, so the rest of the regression suite still reports.
       - name: M1 adversarial corpus eval (warn-only)
+        if: matrix.os == 'ubuntu-latest'
         continue-on-error: true
         env:
           ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
@@ -120,8 +130,12 @@ jobs:
           -o test-results/m1-adversarial.json
 
       # ── Generate rich E2E report from artifacts ────────────────────
+      # Ubuntu-only: the script consumes the medusa adversarial corpus
+      # (cloned only on Ubuntu above) plus the Phase 3 E2E artifacts
+      # the report builds. Windows runs the unit + integration suite
+      # for cross-platform coverage but skips the corpus-driven E2E.
       - name: Generate E2E report
-        if: always()
+        if: always() && matrix.os == 'ubuntu-latest'
         run: python tests/generate_e2e_report.py
 
       # ── Generate step summary from JUnit XML ───────────────────────
@@ -137,6 +151,6 @@ jobs:
         uses: actions/upload-artifact@v4
         if: always()
         with:
-          name: mcp-test-results
+          name: mcp-test-results-${{ matrix.os }}
           path: test-results/
           retention-days: 30
diff --git a/.github/workflows/v0-user-flow-e2e.yml b/.github/workflows/v0-user-flow-e2e.yml
new file mode 100644
index 00000000..28b47492
--- /dev/null
+++ b/.github/workflows/v0-user-flow-e2e.yml
@@ -0,0 +1,219 @@
+name: v0 user flow e2e
+
+# End-to-end validation of BicameralAI/bicameral#108's six canonical user
+# flows via real Claude Code CLI sessions with bicameral-mcp registered.
+# See tests/e2e/README.md for the design.
+#
+# Two-stage workflow:
+#   1. assertions  — always runs (PR + dispatch), no manual gate. Validates
+#                    MCP tool callability + surfaces agentic-layer advisories.
+#   2. recording   — manual dispatch only, gated by an environment with
+#                    required reviewers (`recording-approval`). Produces
+#                    split-screen demo MP4s; expensive (~30-45 min wall +
+#                    API spend), so worth gating behind explicit approval.
+#
+# Note: when this workflow file lands, it will not run on the PR that
+# adds it — pull_request workflows execute the version on the base
+# branch (main). First execution is on the next qualifying PR after merge.
+
+on:
+  pull_request:
+    branches: [main, dev]
+    paths:
+      - 'tests/e2e/**'
+      - 'handlers/**'
+      - 'ledger/**'
+      - 'contracts.py'
+      - 'skills/bicameral-**'
+      - 'server.py'
+      - 'pyproject.toml'
+      - '.github/workflows/v0-user-flow-e2e.yml'
+  workflow_dispatch:
+
+env:
+  PYTHON_VERSION: '3.11'
+  NODE_VERSION: '20'
+  # Pinned commit of github.com/desktop/desktop. Bump when the roadmap.md
+  # shape drifts in ways that break prompts, or when bind targets change.
+  DESKTOP_PINNED_COMMIT: 'e6c50fb028171e9cec03594273c8116bb135847e'
+  DESKTOP_REPO_PATH: /tmp/desktop-clone
+
+jobs:
+  # ── Stage 1: assertions — always runs ───────────────────────────────
+  assertions:
+    name: e2e assertions (auto)
+    runs-on: ubuntu-latest
+    # production env provides CLAUDE_CODE_OAUTH_TOKEN. No required reviewers
+    # on this env → PR triggers flow through automatically.
+    environment: production
+    timeout-minutes: 25
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ env.PYTHON_VERSION }}
+
+      - name: Setup Node.js (for Claude Code CLI)
+        uses: actions/setup-node@v4
+        with:
+          node-version: ${{ env.NODE_VERSION }}
+
+      - name: Install bicameral-mcp + test deps
+        run: pip install -e ".[test]"
+
+      - name: Install Claude Code CLI
+        run: npm install -g @anthropic-ai/claude-code
+
+      - name: Verify CLI tooling on PATH
+        run: |
+          which claude && claude --version
+          which bicameral-mcp
+
+      - name: Clone desktop/desktop at pinned commit
+        run: |
+          mkdir -p ${{ env.DESKTOP_REPO_PATH }}
+          cd ${{ env.DESKTOP_REPO_PATH }}
+          git init -q
+          git remote add origin https://github.com/desktop/desktop
+          git fetch --depth 1 origin "${DESKTOP_PINNED_COMMIT}"
+          git checkout FETCH_HEAD
+          git checkout -b main
+          git config user.email ci@bicameral.test
+          git config user.name CI
+          test -f docs/process/roadmap.md
+          test -f app/src/lib/git/cherry-pick.ts
+
+      - name: Claude Code OAuth token visibility probe
+        run: |
+          set +e
+          if [ -n "${CLAUDE_CODE_OAUTH_TOKEN}" ]; then
+            echo "CLAUDE_CODE_OAUTH_TOKEN: present (length=${#CLAUDE_CODE_OAUTH_TOKEN})"
+          else
+            echo "CLAUDE_CODE_OAUTH_TOKEN: EMPTY or UNSET"
+            echo "  secret expression non-empty: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN != '' }}"
+            exit 1
+          fi
+        env:
+          CLAUDE_CODE_OAUTH_TOKEN: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
+
+      - name: Run v0 user flow e2e (assertion-only, blocking)
+        env:
+          CLAUDE_CODE_OAUTH_TOKEN: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
+        run: python tests/e2e/run_e2e_flows.py
+
+      - name: Upload e2e transcripts
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: v0-user-flow-e2e-transcripts
+          path: test-results/e2e/
+          retention-days: 30
+
+  # ── Stage 2: recording — manual approval required ────────────────────
+  recording:
+    name: split-screen demo recording (manual approval)
+    # No `needs:` — runs in parallel with `assertions`. Advisory failures
+    # in the assertion harness must NOT block recording: the demo is
+    # meant to showcase the agentic gap as well as the wins, and the two
+    # paths have independent value (assertion = MCP-tool callability,
+    # recording = visual validation of the agentic layer).
+    #
+    # The `recording-approval` environment's required-reviewers rule is
+    # the SOLE gate. No `if:` predicate — adding one would skip the job
+    # on PR triggers (or on dispatch without an extra input toggle), so
+    # reviewers would never see the approval prompt. Letting the job
+    # always queue means it sits in "Waiting" until someone with reviewer
+    # permission clicks Approve in the Actions UI.
+    runs-on: ubuntu-latest
+    # `recording-approval` env should have required reviewers configured
+    # in repo settings → that's the manual gate. Inherits OAuth token from
+    # the same env (or repo-level secrets).
+    environment: recording-approval
+    timeout-minutes: 60
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ env.PYTHON_VERSION }}
+
+      - name: Setup Node.js (for Claude Code CLI)
+        uses: actions/setup-node@v4
+        with:
+          node-version: ${{ env.NODE_VERSION }}
+
+      - name: Install bicameral-mcp + test deps
+        run: pip install -e ".[test]"
+
+      - name: Install Claude Code CLI
+        run: npm install -g @anthropic-ai/claude-code
+
+      - name: Verify CLI tooling on PATH
+        run: |
+          which claude && claude --version
+          which bicameral-mcp
+
+      - name: Clone desktop/desktop at pinned commit
+        run: |
+          mkdir -p ${{ env.DESKTOP_REPO_PATH }}
+          cd ${{ env.DESKTOP_REPO_PATH }}
+          git init -q
+          git remote add origin https://github.com/desktop/desktop
+          git fetch --depth 1 origin "${DESKTOP_PINNED_COMMIT}"
+          git checkout FETCH_HEAD
+          git checkout -b main
+          git config user.email ci@bicameral.test
+          git config user.name CI
+          test -f docs/process/roadmap.md
+          test -f app/src/lib/git/cherry-pick.ts
+
+      # NOTE: do NOT install `chromium-browser` here — on Ubuntu 22.04+ the
+      # apt package is a snap-store wrapper that hangs the runner. GitHub's
+      # ubuntu-latest image ships google-chrome-stable pre-installed;
+      # record_demo.sh auto-detects it.
+      - name: Install recording dependencies (Xvfb + ffmpeg + xterm + tmux)
+        run: |
+          sudo apt-get update -qq
+          sudo apt-get install -y --no-install-recommends \
+            xvfb fluxbox xterm ffmpeg tmux fonts-dejavu
+          command -v google-chrome-stable || command -v google-chrome || \
+            command -v chromium || command -v chromium-browser || \
+            { echo "ERROR: no chromium-compatible browser found on PATH" >&2; exit 1; }
+
+      # ANTHROPIC_API_KEY (NOT CLAUDE_CODE_OAUTH_TOKEN) — interactive `claude`
+      # ignores the OAuth env var (verified against 2.1.126; matches GH issue
+      # #32463). The assertions job's `claude -p` path keeps using OAuth.
+      - name: Anthropic API key visibility probe
+        run: |
+          set +e
+          if [ -n "${ANTHROPIC_API_KEY}" ]; then
+            echo "ANTHROPIC_API_KEY: present (length=${#ANTHROPIC_API_KEY})"
+          else
+            echo "ANTHROPIC_API_KEY: EMPTY or UNSET"
+            echo "  secret expression non-empty: ${{ secrets.ANTHROPIC_API_KEY != '' }}"
+            exit 1
+          fi
+        env:
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+
+      # continue-on-error: a recording flake should not propagate as a hard
+      # failure. The artifact upload below preserves whatever was captured.
+      # Uses the interactive (tmux-driven real claude TUI) path; legacy
+      # `tests/e2e/record_demo.sh` is retained as a fallback.
+      - name: Record demo videos (split-screen, interactive TUI)
+        continue-on-error: true
+        env:
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+        run: bash tests/e2e/record_demo_interactive.sh
+
+      - name: Upload demo videos
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: v0-user-flow-e2e-demos
+          path: docs/demos/v0-userflow-e2e/*.mp4
+          retention-days: 90
+          if-no-files-found: warn
diff --git a/.gitignore b/.gitignore
index c32c25b9..fea06007 100644
--- a/.gitignore
+++ b/.gitignore
@@ -19,6 +19,11 @@ test-results/
 # Bicameral MCP local data (history stored in parent repo)
 .bicameral/
 
+# Demo MP4s — generated by the optional `record_demo` workflow path.
+# Path-tracked under docs/demos/v0-userflow-e2e/ but binaries are
+# distributed via the GitHub Actions artifact, not git.
+docs/demos/**/*.mp4
+
 # QOR governance (process-only — not part of the published artifact)
 .agent/
 .failsafe/
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f0486be6..68c0bf6b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,30 @@
 All notable changes to bicameral-mcp are tracked here. Format loosely follows
 [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
 
+<<<<<<< triage-from-dev
+## [Unreleased]
+
+### Added
+
+- `handlers/preflight.py` — `_region_anchored_preflight` now expands caller-supplied `file_paths` by 1 hop along the code-locator graph's **import edges** before the `binds_to` lookup. Lifts the strict exact-match recall ceiling so a decision bound to `app/src/lib/git/reorder.ts` surfaces when the caller passes the structurally-near `app/src/ui/multi-commit-operation/reorder.tsx`. Decisions reached only via expansion carry `confidence=0.7` (vs `0.9` for direct pins). `sources_chained` includes `"graph"` (alongside `"region"`) when expansion contributed at least one hit. Bounded per #64: ≤10 input seeds × `max_neighbors_per_result` neighbors per seed. Closes #173 (and supersedes #64).
+- `adapters/code_locator.py::RealCodeLocatorAdapter.expand_file_paths_via_graph` — public method backing the expansion. Filters to ``imports`` edges only (file-level structural dependency); ``invokes`` / ``inherits`` / ``contains`` are symbol-level edges that over-broaden the file-level expansion. Returns `(expanded, added)` so callers can mark provenance.
+- `skills/bicameral-preflight/SKILL.md` Step 2 — documents the imports-only expansion + caller-side `confidence` and `sources_chained` semantics.
+- `tests/eval/preflight_dataset.jsonl` — M6 row flipped from XFAIL → live. Setup updated to specify graph-neighbor topology (`graph_neighbors`) and pinned-decision targets (`region_decisions_pinned_to`); the asserter now tests true graph-expansion semantics rather than mock-returns-decision-regardless-of-input.
+- `tests/eval/run_preflight_eval.py` — `_apply_setup` extended with `region_decisions_pinned_to` (path-aware decision lookup) and `graph_neighbors` (stub code_graph) so M6-style scenarios can be expressed in the dataset.
+
+### Changed
+
+- `skills/bicameral-preflight/SKILL.md` Step 5.6 — judgment for contradiction-capture moves from the agent to the user via `AskUserQuestion` (Step 5.6.1). The agent no longer infers whether the prompt contradicts a surfaced decision; it asks the user (`supersede` / `keep_both` / `unrelated`) and acts mechanically on the answer (Step 5.6.2 — ingest + resolve_collision). The PostToolUse hook reminder now templates the disambiguation question rather than the bare ingest+resolve_collision sequence. Closes #175.
+- `tests/e2e/run_e2e_flows.py::assert_flow_2a` — pass criterion changed from "ingest+resolve_collision fired" to "`AskUserQuestion` invoked with disambiguation shape after preflight surfaced ≥1 decision." The user-side response can't be driven in headless `claude -p`, so the testable signal is the question invocation. The mechanical capture (Step 5.6.2) only fires after a human answers and is exercised in interactive Claude Code sessions, not CI.
+
+### Fixed
+
+### Schema
+
+### Security
+
+=======
+>>>>>>> main
 ## v0.13.6 — Triage: dashboard tooltip + capture-corrections source fix + #108 sim — built via [QorLogic SDLC](https://github.com/MythologIQ-Labs-LLC/qor-logic)
 
 Triage release per [DEV_CYCLE.md §10.5](DEV_CYCLE.md). Forwards three
diff --git a/adapters/code_locator.py b/adapters/code_locator.py
index ceb88624..64011d4b 100644
--- a/adapters/code_locator.py
+++ b/adapters/code_locator.py
@@ -63,6 +63,7 @@ def _ensure_initialized(self) -> None:
             )
 
         self._db = db
+        self._config = config
         self._validate_tool = ValidateSymbolsTool(db, config)
         self._neighbors_tool = GetNeighborsTool(db, config)
         self._initialized = True
@@ -90,6 +91,180 @@ def get_neighbors(self, symbol_id: int) -> list[dict]:
         results = self._neighbors_tool.execute({"symbol_id": symbol_id})
         return [r.model_dump() for r in results]
 
+    # Hard cap on the number of caller-supplied seeds we expand. Mirrors the
+    # contract documented in #64: ≤10 input seeds × ≤max_neighbors_per_result
+    # neighbors per seed, so the worst-case response is still bounded even
+    # when the caller passes a large file_paths list. Tightens the cost
+    # envelope vs the per-config-only cap. Tunable via the PR's #64 lineage
+    # if telemetry shows we're losing recall.
+    _MAX_EXPANSION_SEEDS = 10
+
+    def expand_file_paths_via_graph(
+        self,
+        file_paths: list[str],
+        hops: int = 1,
+    ) -> tuple[list[str], list[str]]:
+        """Expand caller-supplied file paths to include 1-hop *import* graph
+        neighbors.
+
+        For each input file, look up its indexed symbols, fetch each
+        symbol's 1-hop ego graph filtered to **import edges only**, and
+        collect the file paths those neighbor symbols live in. The expanded
+        set is the union of inputs and neighbor files.
+
+        **Why imports only** (per #64): import is a *file-level* structural
+        dependency edge ("module A's contract is referenced by module B"),
+        which matches the granularity of the region-anchored decision
+        lookup. ``invokes`` / ``inherits`` / ``contains`` are *symbol-level*
+        edges that broaden the expansion to "any file whose symbols are
+        used by my file's symbols," which over-fires for the recall
+        contract this method backs. If telemetry surfaces real-world
+        contradictions that imports-only misses, widen the filter then —
+        not preemptively.
+
+        Returns ``(expanded, added)`` where ``expanded`` is the deduped
+        union (preserving caller order for inputs, then appending
+        newly-discovered neighbor files) and ``added`` is the list of file
+        paths NOT in the original input — the caller uses this to mark
+        expanded matches with lower confidence than direct pins.
+
+        Bounds (mirrors #64's spec):
+          - At most ``_MAX_EXPANSION_SEEDS`` (=10) input seeds are walked.
+          - For each seed, at most ``max_neighbors_per_result`` symbols are
+            walked; for each symbol, at most ``max_neighbors_per_result``
+            neighbors are inspected.
+          - Global cap on the added set is the product so the worst-case
+            response is still bounded for hub seeds.
+        Falls back gracefully (returns input unchanged + empty added list)
+        on any exception or if the symbol index is unavailable.
+
+        Used by ``handlers/preflight.py::_region_anchored_preflight`` to
+        lift the strict ``WHERE file_path IN $fps`` recall ceiling so the
+        contradiction-capture loop fires even when the caller picks a
+        structurally-near-but-not-exact file. See issue #173 (and the
+        superseded #64 for the imports-only design rationale).
+        """
+        if not file_paths or hops < 1:
+            return list(file_paths), []
+        try:
+            self._ensure_initialized()
+        except Exception:
+            return list(file_paths), []
+
+        per_symbol_cap = self._config.max_neighbors_per_result
+        # Cap total NEW paths added by expansion. With ≤10 seeds and
+        # ≤per_symbol_cap neighbors each, the worst case is bounded.
+        global_cap = max(per_symbol_cap, per_symbol_cap * self._MAX_EXPANSION_SEEDS)
+
+        # Cap the number of input seeds we expand from. Caller can still pass
+        # more file_paths to the underlying ledger lookup — we just don't
+        # blow up the graph walk.
+        seeds = [fp for fp in file_paths if fp][: self._MAX_EXPANSION_SEEDS]
+
+        original_set = {fp for fp in file_paths if fp}
+        added_paths: list[str] = []
+        added_set: set[str] = set()
+
+        for fp in seeds:
+            try:
+                symbols = self._db.lookup_by_file(fp) or []
+            except Exception:
+                continue
+            for sym in symbols[:per_symbol_cap]:
+                if len(added_paths) >= global_cap:
+                    break
+                sym_id = sym["id"]
+                try:
+                    neighbors = self._db.get_ego_graph(sym_id, hops=hops) or []
+                except Exception:
+                    continue
+                for n in neighbors[:per_symbol_cap]:
+                    if len(added_paths) >= global_cap:
+                        break
+                    if (n.get("edge_type") or "") != "imports":
+                        continue
+                    nfp = (n.get("file_path") or "").strip()
+                    if not nfp or nfp in original_set or nfp in added_set:
+                        continue
+                    added_set.add(nfp)
+                    added_paths.append(nfp)
+            if len(added_paths) >= global_cap:
+                break
+
+        # Preserve caller order for the input prefix; append newly-added in
+        # discovery order.
+        expanded: list[str] = []
+        for fp in file_paths:
+            if fp and fp not in expanded:
+                expanded.append(fp)
+        expanded.extend(added_paths)
+        return expanded, added_paths
+
+    def neighbors_for(
+        self,
+        file_path: str,
+        start_line: int,
+        end_line: int,
+    ) -> tuple[str, ...]:
+        """Return 1-hop neighbor symbol addresses for a code span.
+
+        Phase 3 (#60) protocol: resolve the symbol at ``(file, start, end)``
+        via the existing symbol index, fetch its 1-hop neighbors, return
+        their addresses (``"<file>::<symbol_name>"``) as a sorted tuple.
+        Returns ``()`` when no symbol resolves to the span — matcher
+        gracefully degrades on the Jaccard signal.
+        """
+        self._ensure_initialized()
+        try:
+            sym_id = self._resolve_symbol_id_for_span(file_path, start_line, end_line)
+            if sym_id is None:
+                return ()
+            neighbors = self._neighbors_tool.execute({"symbol_id": sym_id})
+        except Exception:
+            return ()
+        addresses = sorted(
+            f"{getattr(n, 'file_path', '')}::{getattr(n, 'symbol_name', '') or getattr(n, 'name', '')}"
+            for n in neighbors
+        )
+        return tuple(addresses)
+
+    def _resolve_symbol_id_for_span(
+        self,
+        file_path: str,
+        start_line: int,
+        end_line: int,
+    ) -> int | None:
+        """Look up the symbol_id whose span contains the given line range.
+
+        Uses the already-initialized ``self._db`` (set up in
+        ``_ensure_initialized``) via ``lookup_by_file``, then picks the
+        smallest enclosing symbol (most specific match). Returns
+        ``None`` if no symbol's span covers the requested range —
+        caller treats this as "no neighbors known" and the matcher's
+        Jaccard signal contributes zero.
+
+        PR #73 review history:
+        - Earlier draft opened a fresh ``SymbolDB(...)`` per call,
+          leaking SQLite handles (CodeRabbit MAJOR adapters/code_locator.py:136).
+        - It also referenced ``config.sqlite_db_path``, which doesn't
+          exist on ``CodeLocatorConfig`` — the real attribute is
+          ``sqlite_db``. The ``AttributeError`` was silently swallowed
+          by ``neighbors_for``'s broad ``except``, so the method
+          always returned ``()`` and the continuity Jaccard signal
+          was permanently zero in production (Devin CRITICAL).
+        Both fixed by reusing ``self._db``.
+        """
+        rows = self._db.lookup_by_file(file_path)
+        best_id: int | None = None
+        best_span: int = 1 << 30
+        for r in rows:
+            r_start, r_end = r["start_line"], r["end_line"]
+            if r_start <= start_line and r_end >= end_line:
+                span = r_end - r_start
+                if span < best_span:
+                    best_span, best_id = span, r["id"]
+        return best_id
+
     async def extract_symbols(self, file_path: str) -> list[dict]:
         """Extract symbols from a file via tree-sitter (no LLM)."""
         from code_locator.indexing.symbol_extractor import extract_symbols
@@ -102,12 +277,14 @@ async def extract_symbols(self, file_path: str) -> list[dict]:
             sym_type = rec.type
             if sym_type not in ("function", "class", "module", "file"):
                 sym_type = "function"
-            symbols.append({
-                "name": rec.qualified_name or rec.name,
-                "type": sym_type,
-                "start_line": rec.start_line,
-                "end_line": rec.end_line,
-            })
+            symbols.append(
+                {
+                    "name": rec.qualified_name or rec.name,
+                    "type": sym_type,
+                    "start_line": rec.start_line,
+                    "end_line": rec.end_line,
+                }
+            )
         return symbols
 
     def resolve_symbols(self, payload: dict) -> dict:
@@ -117,10 +294,7 @@ def resolve_symbols(self, payload: dict) -> dict:
         if not mappings:
             return payload
 
-        needs_resolution = any(
-            m.get("symbols") and not m.get("code_regions")
-            for m in mappings
-        )
+        needs_resolution = any(m.get("symbols") and not m.get("code_regions") for m in mappings)
         if not needs_resolution:
             return payload
 
@@ -141,21 +315,27 @@ def resolve_symbols(self, payload: dict) -> dict:
                     try:
                         rows = db.lookup_by_name(name)
                     except Exception as exc:
-                        logger.warning("[resolve_symbols] lookup_by_name failed for '%s': %s", name, exc)
+                        logger.warning(
+                            "[resolve_symbols] lookup_by_name failed for '%s': %s", name, exc
+                        )
                         rows = []
                     for row in rows:
-                        code_regions.append({
-                            "symbol": row["qualified_name"] or row["name"],
-                            "file_path": row["file_path"],
-                            "start_line": row["start_line"],
-                            "end_line": row["end_line"],
-                            "type": row["type"],
-                            "purpose": mapping.get("intent", ""),
-                        })
+                        code_regions.append(
+                            {
+                                "symbol": row["qualified_name"] or row["name"],
+                                "file_path": row["file_path"],
+                                "start_line": row["start_line"],
+                                "end_line": row["end_line"],
+                                "type": row["type"],
+                                "purpose": mapping.get("intent", ""),
+                            }
+                        )
                 if code_regions:
                     mapping = {**mapping, "code_regions": code_regions}
                 else:
-                    logger.debug("[resolve_symbols] no symbols found in index for: %s", symbol_names)
+                    logger.debug(
+                        "[resolve_symbols] no symbols found in index for: %s", symbol_names
+                    )
 
             resolved_mappings.append(mapping)
 
diff --git a/adapters/ledger.py b/adapters/ledger.py
index 3516d7c9..71341c5b 100644
--- a/adapters/ledger.py
+++ b/adapters/ledger.py
@@ -34,6 +34,7 @@ def _read_collaboration_mode(repo_path: str) -> str:
         return "solo"
     try:
         import yaml
+
         config = yaml.safe_load(config_path.read_text(encoding="utf-8")) or {}
         return config.get("mode", "solo")
     except Exception:
@@ -66,9 +67,9 @@ def get_ledger():
         mode = _read_collaboration_mode(repo_path)
 
         if mode == "team":
-            from events.writer import EventFileWriter, _get_git_email
             from events.materializer import EventMaterializer
             from events.team_adapter import TeamWriteAdapter
+            from events.writer import EventFileWriter, _get_git_email
 
             # BICAMERAL_DATA_PATH redirects all history (events + local state)
             # to a separate directory — typically a private parent repo when
@@ -103,4 +104,5 @@ def get_drift_analyzer():
     or CodeGenomeDriftAnalyzer when ready.
     """
     from ledger.drift import HashDriftAnalyzer
+
     return HashDriftAnalyzer()
diff --git a/cli/__init__.py b/cli/__init__.py
new file mode 100644
index 00000000..6a6f550b
--- /dev/null
+++ b/cli/__init__.py
@@ -0,0 +1 @@
+"""Bicameral-MCP CLI utilities."""
diff --git a/code_locator/indexing/cocoindex_pipeline.py b/code_locator/indexing/cocoindex_pipeline.py
index bed6b4f3..67d170f2 100644
--- a/code_locator/indexing/cocoindex_pipeline.py
+++ b/code_locator/indexing/cocoindex_pipeline.py
@@ -129,9 +129,7 @@ def extract_file_symbols(filename: str, content: str) -> list[dict]:
     def text_to_embedding(
         text: cocoindex.DataSlice[str],
     ) -> cocoindex.DataSlice[list[float]]:
-        return text.transform(
-            cocoindex.functions.SentenceTransformerEmbed(model=embedding_model)
-        )
+        return text.transform(cocoindex.functions.SentenceTransformerEmbed(model=embedding_model))
 
     @cocoindex.flow_def(name="CodeLocatorIndex")
     def code_locator_flow(
@@ -175,9 +173,7 @@ def code_locator_flow(
                 )
 
             # Path 2: Symbol extraction
-            file["symbols"] = file["content"].transform(
-                extract_file_symbols, file["filename"]
-            )
+            file["symbols"] = file["content"].transform(extract_file_symbols, file["filename"])
 
             with file["symbols"].row() as sym:
                 symbol_collector.collect(
@@ -292,8 +288,10 @@ def _count_cocoindex_table(table_name: str) -> int:
     Falls back to 0 if the table doesn't exist or connection fails.
     """
     import os
+
     try:
         import psycopg2
+
         url = os.environ.get("COCOINDEX_DATABASE_URL", "")
         if not url:
             return 0
diff --git a/code_locator/indexing/graph_builder.py b/code_locator/indexing/graph_builder.py
index 32e5bd1f..6945e32d 100644
--- a/code_locator/indexing/graph_builder.py
+++ b/code_locator/indexing/graph_builder.py
@@ -8,19 +8,17 @@
 
 import os
 from pathlib import Path
-from typing import Dict, List, Set, Tuple
 
 from .sqlite_store import SymbolDB
 from .symbol_extractor import (
     EXTENSION_LANGUAGE,
-    SKIP_DIRS,
     _get_parser,
     _node_text,
 )
 
-
 # ── Contains edges ───────────────────────────────────────────────────
 
+
 def _build_contains_edges(db: SymbolDB) -> list[tuple[int, int, str]]:
     """Build parent->child edges using parent_qualified_name."""
     conn = db._connect()
@@ -50,6 +48,7 @@ def _build_contains_edges(db: SymbolDB) -> list[tuple[int, int, str]]:
 
 # ── Import edges ─────────────────────────────────────────────────────
 
+
 def _extract_python_imports(tree, code: bytes) -> list[str]:
     """Extract imported names from Python import statements."""
     names: list[str] = []
@@ -73,7 +72,11 @@ def walk(node):
         if node.type == "import_from_statement":
             # from foo import bar, baz
             for child in node.children:
-                if child.type == "dotted_name" and child.prev_sibling and _node_text(code, child.prev_sibling) == "import":
+                if (
+                    child.type == "dotted_name"
+                    and child.prev_sibling
+                    and _node_text(code, child.prev_sibling) == "import"
+                ):
                     names.append(_node_text(code, child))
                 elif child.type == "aliased_import":
                     alias = child.child_by_field_name("alias")
@@ -198,6 +201,7 @@ def _extract_imports_for_language(language_id: str, tree, code: bytes) -> list[s
 
 # ── Invokes edges ────────────────────────────────────────────────────
 
+
 def _extract_call_names(tree, code: bytes, language_id: str) -> list[tuple[int, str]]:
     """Extract (line_number, called_function_name) from call expressions.
 
@@ -230,6 +234,7 @@ def walk(node):
 
 # ── Main builder ─────────────────────────────────────────────────────
 
+
 def build_graph(db: SymbolDB, repo_path: str) -> int:
     """Build dependency edges for all indexed symbols. Returns edge count."""
     # Clear old edges — full rebuild is fast relative to symbol extraction
@@ -250,7 +255,7 @@ def build_graph(db: SymbolDB, repo_path: str) -> int:
     ).fetchall()
 
     # Map: name -> list of symbol ids (multiple symbols can have the same name)
-    name_to_ids: Dict[str, list[int]] = {}
+    name_to_ids: dict[str, list[int]] = {}
     for sym in all_symbols:
         name = sym[1]
         if name not in name_to_ids:
@@ -274,7 +279,7 @@ def build_graph(db: SymbolDB, repo_path: str) -> int:
             continue
 
         try:
-            with open(abs_path, "r", encoding="utf-8", errors="replace") as f:
+            with open(abs_path, encoding="utf-8", errors="replace") as f:
                 source = f.read()
         except OSError:
             continue
@@ -301,7 +306,7 @@ def build_graph(db: SymbolDB, repo_path: str) -> int:
         for row in file_all_symbols:
             all_file_sym_ids.add(row[0])
 
-        seen_import_edges: Set[Tuple[int, int]] = set()
+        seen_import_edges: set[tuple[int, int]] = set()
         for imp_name in imported_names:
             target_ids = name_to_ids.get(imp_name, [])
             for target_id in target_ids:
@@ -324,7 +329,7 @@ def build_graph(db: SymbolDB, repo_path: str) -> int:
             (rel_path,),
         ).fetchall()
 
-        seen_invoke_edges: Set[Tuple[int, int]] = set()
+        seen_invoke_edges: set[tuple[int, int]] = set()
         for func in func_symbols:
             func_id = func[0]
             func_start = func[2]
diff --git a/code_locator/indexing/index_builder.py b/code_locator/indexing/index_builder.py
index bf66f885..cf1e1d1c 100644
--- a/code_locator/indexing/index_builder.py
+++ b/code_locator/indexing/index_builder.py
@@ -93,6 +93,7 @@ def build_index(repo_path: str, db_path: str) -> IndexStats:
 
     # Build dependency graph edges
     from .graph_builder import build_graph
+
     stats.edges_created = build_graph(db, repo_path)
 
     db.close()
diff --git a/code_locator/indexing/sqlite_store.py b/code_locator/indexing/sqlite_store.py
index 0f744fd9..a1a7e649 100644
--- a/code_locator/indexing/sqlite_store.py
+++ b/code_locator/indexing/sqlite_store.py
@@ -9,7 +9,6 @@
 import sqlite3
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Any
 
 
 @dataclass
@@ -96,8 +95,16 @@ def insert_symbols_batch(self, symbols: list[SymbolRecord]) -> None:
                (name, qualified_name, type, file_path, start_line, end_line, signature, parent_qualified_name)
                VALUES (?, ?, ?, ?, ?, ?, ?, ?)""",
             [
-                (s.name, s.qualified_name, s.type, s.file_path,
-                 s.start_line, s.end_line, s.signature, s.parent_qualified_name)
+                (
+                    s.name,
+                    s.qualified_name,
+                    s.type,
+                    s.file_path,
+                    s.start_line,
+                    s.end_line,
+                    s.signature,
+                    s.parent_qualified_name,
+                )
                 for s in symbols
             ],
         )
@@ -110,21 +117,15 @@ def delete_file_symbols(self, file_path: str) -> None:
 
     def lookup_by_name(self, name: str) -> list[sqlite3.Row]:
         conn = self._connect()
-        return conn.execute(
-            "SELECT * FROM symbols WHERE name = ?", (name,)
-        ).fetchall()
+        return conn.execute("SELECT * FROM symbols WHERE name = ?", (name,)).fetchall()
 
     def lookup_by_file(self, file_path: str) -> list[sqlite3.Row]:
         conn = self._connect()
-        return conn.execute(
-            "SELECT * FROM symbols WHERE file_path = ?", (file_path,)
-        ).fetchall()
+        return conn.execute("SELECT * FROM symbols WHERE file_path = ?", (file_path,)).fetchall()
 
     def get_all_symbol_names(self) -> list[tuple[int, str, str]]:
         conn = self._connect()
-        rows = conn.execute(
-            "SELECT id, name, qualified_name FROM symbols"
-        ).fetchall()
+        rows = conn.execute("SELECT id, name, qualified_name FROM symbols").fetchall()
         return [(r[0], r[1], r[2]) for r in rows]
 
     def symbol_count(self) -> int:
@@ -133,9 +134,7 @@ def symbol_count(self) -> int:
 
     def lookup_by_id(self, symbol_id: int) -> sqlite3.Row | None:
         conn = self._connect()
-        return conn.execute(
-            "SELECT * FROM symbols WHERE id = ?", (symbol_id,)
-        ).fetchone()
+        return conn.execute("SELECT * FROM symbols WHERE id = ?", (symbol_id,)).fetchone()
 
     def delete_all_edges(self) -> None:
         conn = self._connect()
diff --git a/code_locator/indexing/symbol_extractor.py b/code_locator/indexing/symbol_extractor.py
index 6b74deb5..51f246a1 100644
--- a/code_locator/indexing/symbol_extractor.py
+++ b/code_locator/indexing/symbol_extractor.py
@@ -6,8 +6,6 @@
 
 from __future__ import annotations
 
-from typing import Dict, List, Optional
-
 from .sqlite_store import SymbolRecord
 
 # ── Language mappings ────────────────────────────────────────────────
@@ -39,14 +37,16 @@
 _USE_LEGACY = False
 
 try:
-    from tree_sitter_languages import get_language as _legacy_get_language, get_parser as _legacy_get_parser
+    from tree_sitter_languages import get_language as _legacy_get_language
+    from tree_sitter_languages import get_parser as _legacy_get_parser
+
     _USE_LEGACY = True
 except Exception:
     _legacy_get_language = None
     _legacy_get_parser = None
 
 # Individual language packages for the modern API
-_LANG_MODULES: Dict[str, object] = {}
+_LANG_MODULES: dict[str, object] = {}
 
 if not _USE_LEGACY:
     try:
@@ -66,8 +66,8 @@
 
 # ── Parser caching ───────────────────────────────────────────────────
 
-PARSER_CACHE: Dict[str, object] = {}
-LANGUAGE_CACHE: Dict[str, object] = {}
+PARSER_CACHE: dict[str, object] = {}
+LANGUAGE_CACHE: dict[str, object] = {}
 
 
 def _get_language_obj(resolved: str):
@@ -84,6 +84,7 @@ def _get_language_obj(resolved: str):
 
     if pkg_name not in _LANG_MODULES:
         import importlib
+
         mod = importlib.import_module(pkg_name)
         _LANG_MODULES[pkg_name] = mod
 
@@ -109,11 +110,12 @@ def _get_parser(language_id: str):
 
 # ── Helpers ──────────────────────────────────────────────────────────
 
+
 def _node_text(code: bytes, node) -> str:
-    return code[node.start_byte:node.end_byte].decode("utf-8", errors="replace")
+    return code[node.start_byte : node.end_byte].decode("utf-8", errors="replace")
 
 
-def _get_name_from_node(node, code: bytes) -> Optional[str]:
+def _get_name_from_node(node, code: bytes) -> str | None:
     name_node = node.child_by_field_name("name")
     if name_node is None:
         return None
@@ -148,10 +150,11 @@ def _make_record(
 
 # ── Python ───────────────────────────────────────────────────────────
 
-def _extract_python_defs(tree, code: bytes, rel_path: str) -> List[SymbolRecord]:
-    records: List[SymbolRecord] = []
 
-    def walk(node, class_stack: List[str]):
+def _extract_python_defs(tree, code: bytes, rel_path: str) -> list[SymbolRecord]:
+    records: list[SymbolRecord] = []
+
+    def walk(node, class_stack: list[str]):
         if node.type == "class_definition":
             name = _get_name_from_node(node, code)
             if not name:
@@ -187,14 +190,15 @@ def walk(node, class_stack: List[str]):
 
 # ── JavaScript / TypeScript / JSX / TSX ──────────────────────────────
 
-def _extract_js_ts_defs(tree, code: bytes, rel_path: str, language_id: str) -> List[SymbolRecord]:
-    records: List[SymbolRecord] = []
+
+def _extract_js_ts_defs(tree, code: bytes, rel_path: str, language_id: str) -> list[SymbolRecord]:
+    records: list[SymbolRecord] = []
 
     class_types = {"class_declaration"}
     if language_id in ("typescript", "tsx"):
         class_types.update({"interface_declaration", "type_alias_declaration", "enum_declaration"})
 
-    def walk(node, class_stack: List[str]):
+    def walk(node, class_stack: list[str]):
         if node.type in class_types:
             name = _get_name_from_node(node, code)
             if not name:
@@ -250,11 +254,12 @@ def walk(node, class_stack: List[str]):
 
 # ── Java ─────────────────────────────────────────────────────────────
 
-def _extract_java_defs(tree, code: bytes, rel_path: str) -> List[SymbolRecord]:
-    records: List[SymbolRecord] = []
+
+def _extract_java_defs(tree, code: bytes, rel_path: str) -> list[SymbolRecord]:
+    records: list[SymbolRecord] = []
     class_types = {"class_declaration", "interface_declaration", "enum_declaration"}
 
-    def walk(node, class_stack: List[str]):
+    def walk(node, class_stack: list[str]):
         if node.type in class_types:
             name = _get_name_from_node(node, code)
             if not name:
@@ -288,10 +293,11 @@ def walk(node, class_stack: List[str]):
 
 # ── Go ───────────────────────────────────────────────────────────────
 
-def _extract_go_defs(tree, code: bytes, rel_path: str) -> List[SymbolRecord]:
-    records: List[SymbolRecord] = []
 
-    def walk(node, class_stack: List[str]):
+def _extract_go_defs(tree, code: bytes, rel_path: str) -> list[SymbolRecord]:
+    records: list[SymbolRecord] = []
+
+    def walk(node, class_stack: list[str]):
         if node.type == "type_spec":
             type_node = node.child_by_field_name("type")
             if type_node is not None and type_node.type in ("struct_type", "interface_type"):
@@ -326,11 +332,12 @@ def walk(node, class_stack: List[str]):
 
 # ── Rust ─────────────────────────────────────────────────────────────
 
-def _extract_rust_defs(tree, code: bytes, rel_path: str) -> List[SymbolRecord]:
-    records: List[SymbolRecord] = []
+
+def _extract_rust_defs(tree, code: bytes, rel_path: str) -> list[SymbolRecord]:
+    records: list[SymbolRecord] = []
     class_types = {"struct_item", "enum_item", "trait_item"}
 
-    def walk(node, class_stack: List[str]):
+    def walk(node, class_stack: list[str]):
         if node.type in class_types:
             name = _get_name_from_node(node, code)
             if not name:
@@ -356,11 +363,17 @@ def walk(node, class_stack: List[str]):
 
 # ── C# ───────────────────────────────────────────────────────────────
 
-def _extract_csharp_defs(tree, code: bytes, rel_path: str) -> List[SymbolRecord]:
-    records: List[SymbolRecord] = []
-    class_types = {"class_declaration", "interface_declaration", "struct_declaration", "enum_declaration"}
 
-    def walk(node, class_stack: List[str]):
+def _extract_csharp_defs(tree, code: bytes, rel_path: str) -> list[SymbolRecord]:
+    records: list[SymbolRecord] = []
+    class_types = {
+        "class_declaration",
+        "interface_declaration",
+        "struct_declaration",
+        "enum_declaration",
+    }
+
+    def walk(node, class_stack: list[str]):
         if node.type in class_types:
             name = _get_name_from_node(node, code)
             if not name:
@@ -394,7 +407,8 @@ def walk(node, class_stack: List[str]):
 
 # ── Dispatch ─────────────────────────────────────────────────────────
 
-def _extract_definitions(language_id: str, tree, code: bytes, rel_path: str) -> List[SymbolRecord]:
+
+def _extract_definitions(language_id: str, tree, code: bytes, rel_path: str) -> list[SymbolRecord]:
     if language_id == "python":
         return _extract_python_defs(tree, code, rel_path)
     if language_id in ("javascript", "jsx", "typescript", "tsx"):
@@ -412,6 +426,7 @@ def _extract_definitions(language_id: str, tree, code: bytes, rel_path: str) ->
 
 # ── Public API ───────────────────────────────────────────────────────
 
+
 def extract_symbols_from_content(
     content: str, language_id: str, rel_path: str
 ) -> list[SymbolRecord]:
@@ -453,7 +468,7 @@ def extract_symbols(file_path: str, repo_root: str) -> list[SymbolRecord]:
 
     rel_path = Path(file_path).relative_to(repo_root).as_posix()
 
-    with open(file_path, "r", encoding="utf-8", errors="replace") as f:
+    with open(file_path, encoding="utf-8", errors="replace") as f:
         source = f.read()
 
     return extract_symbols_from_content(source, language_id, rel_path)
diff --git a/code_locator/models.py b/code_locator/models.py
index a06de85c..2a4d8a27 100644
--- a/code_locator/models.py
+++ b/code_locator/models.py
@@ -9,7 +9,6 @@
 
 from pydantic import BaseModel, Field
 
-
 # ── Input (from Agent A: Transcript Extractor) ──────────────────────
 
 
@@ -44,12 +43,8 @@ class ValidatedSymbol(BaseModel):
 
     original_candidate: str = Field(description="What the LLM (or keyword extractor) proposed")
     matched_symbol: str = Field(description="The real symbol from the index that matched")
-    match_score: float = Field(
-        ge=0.0, le=100.0, description="rapidfuzz match score (0-100)"
-    )
-    symbol_id: int | None = Field(
-        default=None, description="SQLite row ID of the matched symbol"
-    )
+    match_score: float = Field(ge=0.0, le=100.0, description="rapidfuzz match score (0-100)")
+    symbol_id: int | None = Field(default=None, description="SQLite row ID of the matched symbol")
     repo: str = Field(default="", description="Source repo for multi-repo support")
     bridge_method: str = Field(
         default="rapidfuzz_validate",
@@ -96,9 +91,7 @@ class Provenance(BaseModel):
     bridge_match_score: float = Field(
         default=0.0, description="rapidfuzz score of the bridge match"
     )
-    bridge_method: str = Field(
-        default="", description="How the bridge candidate was generated"
-    )
+    bridge_method: str = Field(default="", description="How the bridge candidate was generated")
     rrf_score: float = Field(default=0.0, description="Weighted RRF fusion score")
 
 
@@ -112,7 +105,9 @@ class NeighborInfo(BaseModel):
     file_path: str = Field(description="Path relative to repo root")
     line_number: int = Field(default=0)
     edge_type: str = Field(description="Relationship: contains, imports, invokes, inherits")
-    direction: str = Field(description="forward (this calls neighbor) or backward (neighbor calls this)")
+    direction: str = Field(
+        description="forward (this calls neighbor) or backward (neighbor calls this)"
+    )
 
 
 # ── Output (to Agent C: Evidence Gater) ──────────────────────────────
@@ -134,5 +129,3 @@ class FoundComponent(BaseModel):
     neighbors: list[NeighborInfo] = Field(
         default_factory=list, description="1-hop structural neighbors"
     )
-
-
diff --git a/code_locator/tools/validate_symbols.py b/code_locator/tools/validate_symbols.py
index c0d02707..7b1a68cb 100644
--- a/code_locator/tools/validate_symbols.py
+++ b/code_locator/tools/validate_symbols.py
@@ -2,10 +2,11 @@
 
 from __future__ import annotations
 
+from rapidfuzz import fuzz
+
 from ..config import CodeLocatorConfig
 from ..indexing.sqlite_store import SymbolDB
 from ..models import ValidatedSymbol
-from rapidfuzz import fuzz
 
 # JSON Schema for tool parameter validation
 TOOL_SCHEMA = {
diff --git a/code_locator_runtime.py b/code_locator_runtime.py
index 4dc43c57..733e2888 100644
--- a/code_locator_runtime.py
+++ b/code_locator_runtime.py
@@ -48,8 +48,6 @@ def ensure_runtime_env() -> None:
     os.environ.setdefault("CODE_LOCATOR_SQLITE_DB", str(cache_root / "code-graph.db"))
 
 
-
-
 def _git_stdout(repo_path: str, *args: str) -> str:
     try:
         result = subprocess.run(
diff --git a/codegenome/adapter.py b/codegenome/adapter.py
index 306192e2..9850ddf5 100644
--- a/codegenome/adapter.py
+++ b/codegenome/adapter.py
@@ -13,12 +13,23 @@
 from typing import Any, Literal
 
 EvidenceType = Literal[
-    "code", "test", "diff", "runtime", "doc", "decision", "agent_eval", "manual",
+    "code",
+    "test",
+    "diff",
+    "runtime",
+    "doc",
+    "decision",
+    "agent_eval",
+    "manual",
 ]
 
 DriftStatus = Literal[
-    "reflected", "drifted", "pending", "ungrounded",
-    "semantically_preserved", "needs_review",
+    "reflected",
+    "drifted",
+    "pending",
+    "ungrounded",
+    "semantically_preserved",
+    "needs_review",
 ]
 
 
diff --git a/codegenome/bind_service.py b/codegenome/bind_service.py
index 0e8ea5d3..bfed2595 100644
--- a/codegenome/bind_service.py
+++ b/codegenome/bind_service.py
@@ -40,14 +40,23 @@ def _check_hash_parity(
     logger.warning(
         "[codegenome] identity content_hash %s != region content_hash %s "
         "(decision_id=%s, %s:%d-%d) — writing identity anyway",
-        identity.content_hash, code_region_content_hash,
-        decision_id, file_path, start_line, end_line,
+        identity.content_hash,
+        code_region_content_hash,
+        decision_id,
+        file_path,
+        start_line,
+        end_line,
     )
 
 
 async def _persist_subject_and_identity(
-    *, ledger, identity: SubjectIdentity,
-    kind: str, canonical_name: str, decision_id: str, repo_ref: str,
+    *,
+    ledger,
+    identity: SubjectIdentity,
+    kind: str,
+    canonical_name: str,
+    decision_id: str,
+    repo_ref: str,
 ) -> bool:
     """Run the four ledger writes; return ``True`` on full success.
 
@@ -57,13 +66,16 @@ async def _persist_subject_and_identity(
     that as identity-not-written.
     """
     subject_id = await ledger.upsert_code_subject(
-        kind=kind, canonical_name=canonical_name,
-        current_confidence=identity.confidence, repo_ref=repo_ref,
+        kind=kind,
+        canonical_name=canonical_name,
+        current_confidence=identity.confidence,
+        repo_ref=repo_ref,
     )
     if not subject_id:
         logger.warning(
             "[codegenome] upsert_code_subject empty id for %s/%s",
-            kind, canonical_name,
+            kind,
+            canonical_name,
         )
         return False
 
@@ -107,8 +119,12 @@ async def write_codegenome_identity(
         repo_ref=repo_ref,
     )
     _check_hash_parity(
-        identity, code_region_content_hash,
-        decision_id, file_path, start_line, end_line,
+        identity,
+        code_region_content_hash,
+        decision_id,
+        file_path,
+        start_line,
+        end_line,
     )
     persisted = await _persist_subject_and_identity(
         ledger=ledger,
diff --git a/codegenome/confidence.py b/codegenome/confidence.py
index 9345de5e..c3a23cbc 100644
--- a/codegenome/confidence.py
+++ b/codegenome/confidence.py
@@ -4,17 +4,16 @@
 
 from collections.abc import Iterable, Mapping
 
-
 # Default weights for the confidence model defined in the architecture
 # plan; referenced by Phase 3+4 callers (continuity, drift classifier).
 # Lives here so future phases import from one place without restructuring.
 DEFAULT_CONFIDENCE_WEIGHTS: dict[str, float] = {
-    "subject_resolution":    0.25,
-    "structural_identity":   0.20,
-    "content_similarity":    0.15,
+    "subject_resolution": 0.25,
+    "structural_identity": 0.20,
+    "content_similarity": 0.15,
     "call_graph_similarity": 0.15,
-    "test_support":          0.15,
-    "runtime_support":       0.10,
+    "test_support": 0.15,
+    "runtime_support": 0.10,
 }
 
 
diff --git a/codegenome/deterministic_adapter.py b/codegenome/deterministic_adapter.py
index 8773a3d1..1edb1e76 100644
--- a/codegenome/deterministic_adapter.py
+++ b/codegenome/deterministic_adapter.py
@@ -56,7 +56,11 @@ def compute_identity(
         address = f"cg:{signature_hash}"
 
         content = get_git_content(
-            file_path, start_line, end_line, self.repo_path, ref=repo_ref,
+            file_path,
+            start_line,
+            end_line,
+            self.repo_path,
+            ref=repo_ref,
         )
         if content is None or start_line < 1 or end_line < start_line:
             content_hash: str | None = None
diff --git a/consent.py b/consent.py
index 9e5f5494..2814de00 100644
--- a/consent.py
+++ b/consent.py
@@ -27,9 +27,10 @@
 import logging
 import os
 import sys
-from datetime import datetime, timezone
+from collections.abc import Callable
+from datetime import UTC, datetime
 from pathlib import Path
-from typing import Any, Callable
+from typing import Any
 
 logger = logging.getLogger(__name__)
 
@@ -70,7 +71,7 @@ def write_consent(telemetry: bool, *, via: str) -> None:
     record: dict[str, Any] = {
         "telemetry": "enabled" if telemetry else "disabled",
         "policy_version": POLICY_VERSION,
-        "acknowledged_at": datetime.now(timezone.utc).isoformat(),
+        "acknowledged_at": datetime.now(UTC).isoformat(),
         "acknowledged_via": via,
     }
     _CONSENT_FILE.parent.mkdir(parents=True, exist_ok=True)
diff --git a/context.py b/context.py
index e2a84fef..1d65f8a6 100644
--- a/context.py
+++ b/context.py
@@ -36,6 +36,7 @@ def _read_guided_mode(repo_path: str) -> bool:
         return False
     try:
         import yaml
+
         config = yaml.safe_load(config_path.read_text(encoding="utf-8")) or {}
         return bool(config.get("guided", False))
     except Exception:
@@ -93,7 +94,11 @@ def from_env(cls) -> BicameralContext:
         from adapters.code_locator import get_code_locator
         from adapters.codegenome import get_codegenome
         from adapters.ledger import get_drift_analyzer, get_ledger
-        from code_locator_runtime import detect_authoritative_ref, get_repo_index_state, resolve_ref_sha
+        from code_locator_runtime import (
+            detect_authoritative_ref,
+            get_repo_index_state,
+            resolve_ref_sha,
+        )
         from codegenome.config import CodeGenomeConfig
 
         repo_path = os.getenv("REPO_PATH", ".")
diff --git a/contracts.py b/contracts.py
index dadc8d56..c7e7b1c4 100644
--- a/contracts.py
+++ b/contracts.py
@@ -18,7 +18,6 @@
 
 from pydantic import BaseModel, ConfigDict
 
-
 # ── Skill telemetry diagnostic models ────────────────────────────────
 # One model per skill. extra="forbid" means the handler can detect and
 # echo back any field names the LLM sent that don't belong here.
@@ -85,13 +84,14 @@ class SyncMetrics(BaseModel):
     be ``None`` if that path did not run in the handler — e.g. ledger was
     already synced, or the handler did not take the write barrier.
     """
+
     sync_catchup_ms: float | None = None
     barrier_held_ms: float | None = None
 
 
-
 class CodeRegionSummary(BaseModel):
     """Lean code region for MCP responses — no pipeline metadata."""
+
     file_path: str
     symbol: str
     lines: tuple[int, int]  # (start_line, end_line)
@@ -116,13 +116,15 @@ class DecisionStatusEntry(BaseModel):
     decision_id: str
     description: str
     status: Literal["reflected", "drifted", "pending", "ungrounded"]
-    signoff_state: str | None = None  # proposed | ratified | rejected | collision_pending | context_pending | superseded
-    source_type: str                  # transcript | notion | document | manual | implementation_choice
-    source_ref: str                   # meeting ID, Notion page ID, etc.
-    ingested_at: str                  # ISO datetime
+    signoff_state: str | None = (
+        None  # proposed | ratified | rejected | collision_pending | context_pending | superseded
+    )
+    source_type: str  # transcript | notion | document | manual | implementation_choice
+    source_ref: str  # meeting ID, Notion page ID, etc.
+    ingested_at: str  # ISO datetime
     code_regions: list[CodeRegionSummary]
-    drift_evidence: str = ""          # populated when status = "drifted"
-    blast_radius: list[str] = []      # symbol names of structural dependents (1-hop)
+    drift_evidence: str = ""  # populated when status = "drifted"
+    blast_radius: list[str] = []  # symbol names of structural dependents (1-hop)
     source_excerpt: str = ""
     meeting_date: str = ""
     speakers: list[str] = []
@@ -130,9 +132,9 @@ class DecisionStatusEntry(BaseModel):
 
 
 class DecisionStatusResponse(BaseModel):
-    ref: str                          # git ref evaluated against
-    as_of: str                        # ISO datetime of evaluation
-    summary: dict[str, int]           # {"reflected": N, "drifted": N, ...}
+    ref: str  # git ref evaluated against
+    as_of: str  # ISO datetime of evaluation
+    summary: dict[str, int]  # {"reflected": N, "drifted": N, ...}
     decisions: list[DecisionStatusEntry]
 
 
@@ -141,10 +143,12 @@ class DecisionStatusResponse(BaseModel):
 
 class DecisionMatch(BaseModel):
     decision_id: str
-    description: str                  # the original decision text
+    description: str  # the original decision text
     status: Literal["reflected", "drifted", "pending", "ungrounded"]
-    signoff_state: str | None = None  # proposed | ratified | rejected | collision_pending | context_pending | superseded
-    confidence: float                 # BM25 match score (0–1)
+    signoff_state: str | None = (
+        None  # proposed | ratified | rejected | collision_pending | context_pending | superseded
+    )
+    confidence: float  # BM25 match score (0–1)
     source_ref: str
     code_regions: list[CodeRegionSummary]
     drift_evidence: str = ""
@@ -164,17 +168,19 @@ class ComplianceVerdict(BaseModel):
                          this decision. Server will prune the binds_to edge
                          and record compliance_check with pruned=true.
     """
+
     decision_id: str
     region_id: str
-    content_hash: str            # echoed from PendingComplianceCheck.content_hash
+    content_hash: str  # echoed from PendingComplianceCheck.content_hash
     verdict: Literal["compliant", "drifted", "not_relevant"]
     confidence: Literal["high", "medium", "low"]
-    explanation: str             # one-sentence rationale for audit trail
+    explanation: str  # one-sentence rationale for audit trail
     phase_metadata: dict = {}
 
 
 class ResolveComplianceRejection(BaseModel):
     """Structured rejection for a verdict that failed input validation."""
+
     decision_id: str
     region_id: str
     reason: Literal[
@@ -200,6 +206,7 @@ class ResolveComplianceResponse(BaseModel):
     pruned=true). Holistic status is projected via project_decision_status
     after all verdicts in the batch are written.
     """
+
     phase: Literal["ingest", "drift", "regrounding", "supersession", "divergence"]
     accepted: list[ResolveComplianceAccepted] = []
     rejected: list[ResolveComplianceRejection] = []
@@ -210,19 +217,21 @@ class PendingComplianceCheck(BaseModel):
 
     v0.5.0: decision_id replaces intent_id.
     """
+
     phase: Literal["ingest", "drift", "regrounding"]
     decision_id: str
     region_id: str
     decision_description: str
     file_path: str
     symbol: str
-    content_hash: str                   # key the verdict must be written against
-    code_body: str = ""                 # extracted via tree-sitter, capped
-    old_code_body: str | None = None    # drift-phase only
+    content_hash: str  # key the verdict must be written against
+    code_body: str = ""  # extracted via tree-sitter, capped
+    old_code_body: str | None = None  # drift-phase only
 
 
 class LinkCommitResponse(BaseModel):
     """Returned by /link_commit and embedded in /search_decisions + /detect_drift."""
+
     commit_hash: str
     synced: bool
     reason: Literal["new_commit", "already_synced", "no_changes"]
@@ -246,6 +255,7 @@ class LinkCommitResponse(BaseModel):
 
 class ActionHint(BaseModel):
     """Tester-mode directive appended to search/brief responses."""
+
     kind: Literal[
         "answer_open_questions",
         "review_drift",
@@ -262,7 +272,7 @@ class SearchDecisionsResponse(BaseModel):
     sync_status: LinkCommitResponse
     matches: list[DecisionMatch]
     ungrounded_count: int
-    suggested_review: list[str]      # decision_ids of drifted/pending to review first
+    suggested_review: list[str]  # decision_ids of drifted/pending to review first
     action_hints: list[ActionHint] = []
     sync_metrics: SyncMetrics | None = None  # V1 A3 — catch-up / barrier wall times
 
@@ -274,7 +284,9 @@ class DriftEntry(BaseModel):
     decision_id: str
     description: str
     status: Literal["reflected", "drifted", "pending", "ungrounded"]
-    signoff_state: str | None = None  # proposed | ratified | rejected | collision_pending | context_pending | superseded
+    signoff_state: str | None = (
+        None  # proposed | ratified | rejected | collision_pending | context_pending | superseded
+    )
     symbol: str
     lines: tuple[int, int]
     drift_evidence: str = ""
@@ -306,6 +318,7 @@ class ScanBranchResponse(BaseModel):
 
     Decisions are deduped by decision_id across the full set of changed files.
     """
+
     base_ref: str
     head_ref: str
     sweep_scope: Literal["head_only", "range_diff", "range_truncated", "branch_delta"]
@@ -337,8 +350,8 @@ class DoctorLedgerSummary(BaseModel):
 
 class DoctorResponse(BaseModel):
     scope: Literal["file", "branch", "empty"]
-    file_scan: "DetectDriftResponse | None" = None
-    branch_scan: "ScanBranchResponse | None" = None
+    file_scan: DetectDriftResponse | None = None
+    branch_scan: ScanBranchResponse | None = None
     ledger_summary: DoctorLedgerSummary | None = None
     action_hints: list[ActionHint] = []
 
@@ -348,8 +361,11 @@ class DoctorResponse(BaseModel):
 
 class IngestSpan(BaseModel):
     """Source excerpt from a meeting, document, or manual input."""
+
     text: str = ""
-    source_type: str = "manual"       # transcript | notion | document | manual | agent_session | implementation_choice
+    source_type: str = (
+        "manual"  # transcript | notion | document | manual | agent_session | implementation_choice
+    )
     source_ref: str = ""
     speakers: list[str] = []
     meeting_date: str = ""
@@ -357,6 +373,7 @@ class IngestSpan(BaseModel):
 
 class IngestCodeRegion(BaseModel):
     """Pre-resolved code region for a mapping."""
+
     symbol: str
     file_path: str
     start_line: int = 0
@@ -367,13 +384,14 @@ class IngestCodeRegion(BaseModel):
 
 class IngestMapping(BaseModel):
     """One decision-to-code mapping in the internal pipeline format."""
+
     intent: str
     span: IngestSpan = IngestSpan()
     symbols: list[str] = []
     code_regions: list[IngestCodeRegion] = []
     signoff: dict | None = None
     feature_group: str | None = None
-    decision_level: str | None = None    # L1 | L2 | L3
+    decision_level: str | None = None  # L1 | L2 | L3
     parent_decision_id: str | None = None
 
 
@@ -389,6 +407,7 @@ class IngestDecision(BaseModel):
     decisions are extracted from source, not inferred. Empty excerpts
     are rejected with a clear error.
     """
+
     id: str = ""
     title: str = ""
     description: str = ""
@@ -409,11 +428,12 @@ class IngestActionItem(BaseModel):
 
 class IngestPayload(BaseModel):
     """Ingest input — accepts EITHER mappings (internal) or decisions (natural LLM)."""
+
     repo: str = ""
     commit_hash: str = ""
     query: str = ""
     mappings: list[IngestMapping] = []
-    source: str = "manual"       # transcript | notion | slack | document | manual | agent_session | implementation_choice
+    source: str = "manual"  # transcript | notion | slack | document | manual | agent_session | implementation_choice
     title: str = ""
     date: str = ""
     participants: list[str] = []
@@ -443,7 +463,8 @@ class ContextForCandidate(BaseModel):
     a decision with signoff.state='context_pending' that overlaps with the
     ingested span. Human confirms or rejects via bicameral.resolve_collision.
     """
-    span_id: str           # input_span record ID (e.g. 'input_span:abc123')
+
+    span_id: str  # input_span record ID (e.g. 'input_span:abc123')
     decision_id: str
     decision_description: str
     overlap_score: float = 0.0  # rank-position score; raw BM25 score is always 0 in v2 embedded
@@ -455,9 +476,10 @@ class CreatedDecision(BaseModel):
     Returned in IngestResponse.created_decisions so the caller-LLM can
     cross-reference against bicameral.history without fuzzy text matching.
     """
+
     decision_id: str
     description: str
-    decision_level: str | None = None   # L1 | L2 | L3
+    decision_level: str | None = None  # L1 | L2 | L3
 
 
 class IngestResponse(BaseModel):
@@ -468,10 +490,10 @@ class IngestResponse(BaseModel):
     stats: IngestStats
     created_decisions: list[CreatedDecision] = []
     pending_grounding_decisions: list[dict] = []
-    context_for_candidates: "list[ContextForCandidate]" = []
+    context_for_candidates: list[ContextForCandidate] = []
     source_cursor: SourceCursorSummary | None = None
-    judgment_payload: "GapJudgmentPayload | None" = None   # kept for backward compat
-    judgment_payloads: "list[GapJudgmentPayload]" = []     # one per feature_group topic
+    judgment_payload: GapJudgmentPayload | None = None  # kept for backward compat
+    judgment_payloads: list[GapJudgmentPayload] = []  # one per feature_group topic
     sync_status: LinkCommitResponse | None = None
 
 
@@ -479,7 +501,9 @@ class BriefDecision(BaseModel):
     decision_id: str
     description: str
     status: Literal["reflected", "drifted", "pending", "ungrounded"]
-    signoff_state: str | None = None  # proposed | ratified | rejected | collision_pending | context_pending | superseded
+    signoff_state: str | None = (
+        None  # proposed | ratified | rejected | collision_pending | context_pending | superseded
+    )
     source_type: str = ""
     source_ref: str = ""
     code_regions: list[CodeRegionSummary] = []
@@ -488,7 +512,7 @@ class BriefDecision(BaseModel):
     source_excerpt: str = ""
     meeting_date: str = ""
     signoff: dict | None = None
-    decision_level: str | None = None   # L1 | L2 | L3 — CodeGenome claim/identity split
+    decision_level: str | None = None  # L1 | L2 | L3 — CodeGenome claim/identity split
     parent_decision_id: str | None = None  # L2 → L1 parent link for evidence inheritance
 
 
@@ -549,8 +573,8 @@ class PreflightResponse(BaseModel):
     action_hints: list[ActionHint] = []
     sources_chained: list[str] = []
     # v0.8.0 HITL annotations (topic-independent, ledger health)
-    unresolved_collisions: list[BriefDecision] = []   # collision_pending from prior sessions
-    context_pending_ready: list[BriefDecision] = []   # context_pending with ≥1 confirmed context_for
+    unresolved_collisions: list[BriefDecision] = []  # collision_pending from prior sessions
+    context_pending_ready: list[BriefDecision] = []  # context_pending with ≥1 confirmed context_for
     sync_metrics: SyncMetrics | None = None  # V1 A3 — catch-up wall times
     product_stage: str | None = None  # shown once per device; wait-time expectation-setting
 
@@ -612,8 +636,9 @@ class RatifyResponse(BaseModel):
     Idempotent: calling ratify on an already-signed-off decision returns
     was_new=False and leaves the existing signoff record untouched.
     """
+
     decision_id: str
-    was_new: bool         # True if this call set the signoff; False if already set
+    was_new: bool  # True if this call set the signoff; False if already set
     signoff: dict
     projected_status: Literal["reflected", "drifted", "pending", "ungrounded"]
 
@@ -628,15 +653,16 @@ class ResolveCollisionResponse(BaseModel):
       - collision: new_id + old_id + action ('supersede'|'keep_both')
       - context_for: span_id + decision_id + confirmed (bool)
     """
+
     mode: Literal["collision", "context_for"]
     action_taken: str
-    new_decision_id: str = ""   # collision mode
-    old_decision_id: str = ""   # collision mode
-    span_id: str = ""           # context_for mode
-    decision_id: str = ""       # context_for mode
+    new_decision_id: str = ""  # collision mode
+    old_decision_id: str = ""  # collision mode
+    span_id: str = ""  # context_for mode
+    decision_id: str = ""  # context_for mode
     edge_written: bool = False
-    new_status: str = ""        # projected status of new decision after action
-    old_status: str = ""        # projected status of old decision (supersede only)
+    new_status: str = ""  # projected status of new decision after action
+    old_status: str = ""  # projected status of old decision (supersede only)
 
 
 # ── Tool: bicameral.history ──────────────────────────────────────────────────
@@ -644,45 +670,51 @@ class ResolveCollisionResponse(BaseModel):
 
 class HistorySource(BaseModel):
     """One input span that originated or updated a decision."""
-    source_ref: str               # e.g. "sprint-14-planning"
+
+    source_ref: str  # e.g. "sprint-14-planning"
     source_type: Literal["transcript", "slack", "document", "agent_session", "manual"]
-    date: str                     # ISO date
+    date: str  # ISO date
     speaker: str | None = None
-    quote: str                    # verbatim excerpt from source_span.text
+    quote: str  # verbatim excerpt from source_span.text
 
 
 class HistoryFulfillment(BaseModel):
     """Code grounding for a decision."""
+
     file_path: str
     symbol: str | None = None
     start_line: int
     end_line: int
     git_url: str | None = None
-    grounded_at_ref: str = ""     # git ref when first grounded
+    grounded_at_ref: str = ""  # git ref when first grounded
     baseline_hash: str | None = None
     current_hash: str | None = None
 
 
 class HistoryDecision(BaseModel):
     """Balance-sheet view of one decision: commitment + fulfillment + balance."""
-    id: str                       # decision_id
-    summary: str                  # canonical decision text
+
+    id: str  # decision_id
+    summary: str  # canonical decision text
     featureId: str
     status: Literal["reflected", "drifted", "pending", "ungrounded"]
-    signoff_state: str | None = None  # proposed | ratified | rejected | collision_pending | context_pending | superseded
-    sources: list[HistorySource] = []   # 1+ input spans; empty for AI-discovered
-    fulfillments: list[HistoryFulfillment] = []   # all bound code regions
-    drift_evidence: str | None = None    # human-readable delta when drifted
-    signoff: dict | None = None          # ratification record: state, signer, ratified_at
-    decision_level: str | None = None   # L1 | L2 | L3 — for balance-sheet display
+    signoff_state: str | None = (
+        None  # proposed | ratified | rejected | collision_pending | context_pending | superseded
+    )
+    sources: list[HistorySource] = []  # 1+ input spans; empty for AI-discovered
+    fulfillments: list[HistoryFulfillment] = []  # all bound code regions
+    drift_evidence: str | None = None  # human-readable delta when drifted
+    signoff: dict | None = None  # ratification record: state, signer, ratified_at
+    decision_level: str | None = None  # L1 | L2 | L3 — for balance-sheet display
     parent_decision_id: str | None = None
-    ephemeral: bool = False             # True when current status was determined by a feature-branch commit not yet in authoritative ref
+    ephemeral: bool = False  # True when current status was determined by a feature-branch commit not yet in authoritative ref
 
 
 class HistoryFeature(BaseModel):
     """A feature group containing related decisions."""
-    id: str                       # feature group id (slugified name)
-    name: str                     # canonical feature_group noun phrase
+
+    id: str  # feature group id (slugified name)
+    name: str  # canonical feature_group noun phrase
     decisions: list[HistoryDecision]
 
 
@@ -690,7 +722,7 @@ class HistoryResponse(BaseModel):
     features: list[HistoryFeature]
     truncated: bool = False
     total_features: int = 0
-    as_of: str = ""               # git ref evaluated against
+    as_of: str = ""  # git ref evaluated against
     sync_metrics: SyncMetrics | None = None  # V1 A3 — catch-up wall times
 
 
@@ -699,7 +731,8 @@ class HistoryResponse(BaseModel):
 
 class DashboardResponse(BaseModel):
     """Response from bicameral.dashboard."""
-    url: str                       # http://localhost:{port}
+
+    url: str  # http://localhost:{port}
     status: Literal["started", "already_running"]
     port: int
 
@@ -709,6 +742,7 @@ class DashboardResponse(BaseModel):
 
 class BindResult(BaseModel):
     """Result for one binding in a bicameral.bind call."""
+
     decision_id: str
     region_id: str
     content_hash: str
@@ -718,6 +752,7 @@ class BindResult(BaseModel):
 
 class BindResponse(BaseModel):
     """Response envelope for bicameral.bind."""
+
     bindings: list[BindResult]
     sync_metrics: SyncMetrics | None = None  # V1 A3 — write-barrier hold time
 
@@ -727,6 +762,7 @@ class BindResponse(BaseModel):
 
 class SessionStartBanner(BaseModel):
     """Open-decision summary shown once per session at session start."""
+
     drifted_count: int = 0
     ungrounded_count: int = 0
     proposal_count: int = 0
diff --git a/dashboard/server.py b/dashboard/server.py
index 1d231d2b..90306ca3 100644
--- a/dashboard/server.py
+++ b/dashboard/server.py
@@ -17,7 +17,6 @@
 import asyncio
 import json
 import logging
-import os
 import socket
 from pathlib import Path
 from typing import Any
@@ -100,11 +99,13 @@ async def stop(self) -> None:
     async def notify(self, ctx: Any) -> None:
         """Build a fresh HistoryResponse and push it to all SSE clients."""
         from dashboard.sse import get_broadcaster
+
         broadcaster = get_broadcaster()
         if broadcaster.subscriber_count == 0:
             return
         try:
             from handlers.history import handle_history
+
             response = await handle_history(ctx)
             payload = json.dumps(response.model_dump(), default=str)
             await broadcaster.broadcast(payload)
@@ -162,6 +163,7 @@ async def _serve_history(self, writer: asyncio.StreamWriter) -> None:
         try:
             ctx = self._ctx_factory()
             from handlers.history import handle_history
+
             response = await handle_history(ctx)
             body = json.dumps(response.model_dump(), default=str).encode()
         except Exception as exc:
@@ -171,6 +173,7 @@ async def _serve_history(self, writer: asyncio.StreamWriter) -> None:
 
     async def _serve_sse(self, writer: asyncio.StreamWriter) -> None:
         from dashboard.sse import get_broadcaster
+
         broadcaster = get_broadcaster()
         writer.write(_HTTP_200_SSE.encode())
         await writer.drain()
@@ -179,6 +182,7 @@ async def _serve_sse(self, writer: asyncio.StreamWriter) -> None:
         try:
             ctx = self._ctx_factory()
             from handlers.history import handle_history
+
             response = await handle_history(ctx)
             initial = json.dumps(response.model_dump(), default=str)
             writer.write(f"data: {initial}\n\n".encode())
@@ -191,7 +195,7 @@ async def _serve_sse(self, writer: asyncio.StreamWriter) -> None:
             while True:
                 try:
                     data = await asyncio.wait_for(q.get(), timeout=30.0)
-                except asyncio.TimeoutError:
+                except TimeoutError:
                     # Keep connection alive with an SSE comment; loop and keep waiting.
                     writer.write(b": keepalive\n\n")
                     await writer.drain()
diff --git a/docs/DEV_CYCLE.md b/docs/DEV_CYCLE.md
new file mode 100644
index 00000000..3ece53fe
--- /dev/null
+++ b/docs/DEV_CYCLE.md
@@ -0,0 +1,1177 @@
+# Development Cycle
+
+**Audience**: contributors, release managers (Jin), and anyone shipping a change
+to `BicameralAI/bicameral-mcp`. This document is the contract — if you are about
+to open a branch, write a PR, cut a release, or close an issue, follow what is
+written here. Deviations require a META_LEDGER entry explaining why.
+
+**Repo topology** (as of v0.13.0, post-Phase-4):
+
+```text
+contributor fork (e.g. Knapp-Kevin/bicameral-mcp)
+         │  feature branches live here
+         ▼
+BicameralAI/bicameral-mcp
+   ├── dev      ← integration branch; CI green, code complete, NOT shipped
+   └── main     ← shipped; tagged; users pull from here
+```
+
+Two branches, one direction of flow: **feature → dev → main**. Nothing else
+merges to `main` except `dev` (and the rare hotfix — see §10).
+
+---
+
+## 0. Workflow Feature Release Cycle
+
+**Audience**: anyone proposing a new agentic workflow capability — a new
+skill, a new lifecycle hook, a new auto-fire trigger, a new dashboard
+surface. Distinct from §6 (engineering version release): §6 covers how a
+finished change reaches users; **§0 covers how a workflow idea becomes a
+finished change worth releasing.**
+
+**Why this exists separately**: most of our P0 misses (#146 preflight
+auto-fire, #147 SessionEnd capture-corrections, the e2e harness churn
+across 2026-04 → 2026-05) trace back to the same root cause — we shipped
+the implementation BEFORE we wrote down what success looks like and
+BEFORE we had any way to observe whether it actually worked in the wild.
+The fix is to put validation in front of implementation, not behind it.
+
+### The cycle
+
+```
+┌──────────┐  ┌──────────┐  ┌──────────┐  ┌──────────┐  ┌──────────┐  ┌──────────┐
+│  1.      │  │  2.      │  │  3.      │  │  4.      │  │  5.      │  │  6.      │
+│ Friction │─▶│Candidate │─▶│  Test    │─▶│Functional│─▶│Telemetry │─▶│Optimized │
+│ capture  │  │ workflow │  │ harness  │  │ solution │  │collection│  │ solution │
+└──────────┘  └──────────┘  └──────────┘  └──────────┘  └──────────┘  └──────────┘
+                                  ▲                            │
+                                  │   ◀─── feedback loop ──────┘
+                                  │   (telemetry surfaces gaps the harness should have caught)
+```
+
+**Anti-pattern (the trap we keep falling into)**: jump from step 1
+directly to step 4. Build the skill. Ship it. Discover the harness can't
+observe the auto-fire and telemetry surfaces nothing. Now you're
+retrofitting phases 2/3/5 onto a thing already in production — every
+iteration loses fidelity because the spec and the implementation are
+entangled. (See: every revision history of `tests/e2e/run_e2e_flows.py`.)
+
+### Phases
+
+#### 1. Friction capture
+
+Observable evidence that a real user / agent / contributor stubbed their
+toe on something that should "just work." Symptoms, not fixes.
+
+Examples:
+- Slack thread from a design partner showing `claude -p '/bicameral:sync'`
+  exiting silently (#124).
+- Dashboard footage of a mid-session constraint orphaning as a parallel
+  decision instead of linking to its parent.
+- An e2e harness flow that fails for a reason no one can immediately
+  explain.
+
+Captured as a GitHub issue with `friction` or `desync:*` label, in the
+repo where the friction was observed. Body answers: *what was the user
+trying to do, what happened instead, what would "right" look like.*
+
+**Out of scope at this stage**: solution shape, file paths, schema
+changes. Don't pre-commit to an implementation in the friction note.
+
+#### 2. Candidate workflow
+
+A short prose spec of what the new workflow should look like end-to-end,
+written from the user/agent perspective, NOT the implementation
+perspective. Lives in a source-of-truth issue (e.g.
+`BicameralAI/bicameral#108` for the v0 user flow spec).
+
+Format:
+- **Trigger**: what does the user do or say to enter this workflow?
+- **Sequence**: numbered list of agent-observable steps — tool calls,
+  hook fires, status transitions. Reference the spec; do NOT inline
+  implementation details (file paths, function names, schema columns).
+- **Success outcome**: what visible state proves the workflow worked?
+  Status flip, ledger row, dashboard panel, ratification record.
+- **Failure modes**: what should the user see when each step fails, and
+  what's the recovery path?
+
+The spec is the contract for phases 3–6. If the spec is wrong, the
+harness validates the wrong thing and the implementation chases the
+wrong target.
+
+#### 3. Test harness
+
+A real e2e test that exercises the spec from step 2 against a real
+claude session (not mocks). For bicameral-mcp this lives at
+`tests/e2e/run_e2e_flows.py`.
+
+**Required before any implementation work begins.** The harness fails on
+day one — that's the point. A failing harness with a clear assertion
+message is the spec made executable.
+
+Harness rules:
+- Assert on the spec's success outcome, not the implementation path.
+  ("After commit, decision X is in `pending` state" is good. "Agent
+  called `link_commit` then `resolve_compliance` in that order" is
+  brittle and couples the test to the substrate.)
+- Use natural prompts — never name the tool the agent is supposed to
+  auto-fire. Naming the tool defeats the trigger that IS the product.
+- When success isn't observable in stream-json (e.g. a SessionEnd
+  subprocess writes to the ledger out-of-band), validate via post-hoc
+  ledger query. Document the indirection in the asserter docstring.
+- When a flow fails: distinguish test-harness bug from product gap. If
+  the asserter is wrong about the spec, fix the asserter (no GitHub
+  issue needed). If the spec says X happens and X doesn't happen, that's
+  a product gap — open or update an issue, leave the harness asserting
+  the spec, mark the failure as expected until the implementation lands.
+
+#### 4. Functional solution
+
+Implementation pass that makes the harness pass. Optimize for spec
+correctness — not performance, not polish. Skill description, tool
+contract, lifecycle hooks all in scope.
+
+Done when:
+- Harness PASSes against the unmodified natural prompt from step 3.
+- A real user can complete the flow end-to-end without hitting any of
+  the friction from step 1.
+- Implementation is documented at the level needed for phase 5 telemetry
+  to know what to count.
+
+#### 5. Telemetry collection
+
+Instrument the new workflow with PostHog events /
+`bicameral.skill_begin/end` calls / structured logs that answer: *is
+this actually being used, by whom, and does it work in their hands?*
+
+Telemetry contract is part of the spec, not an afterthought. Each step
+in the candidate workflow (phase 2) should map to a telemetry event the
+dashboard can query.
+
+Wire telemetry BEFORE merging the implementation PR. A workflow you
+can't observe in production is a workflow that's never validated in
+production.
+
+#### 6. Optimized solution
+
+Iterate based on what telemetry shows:
+- Drop-off after step N → step N is unclear or broken in real
+  conditions. Could be a description fix or a substrate change.
+- Auto-fire rate &lt;X% → trigger discipline is losing the priority race;
+  restate the skill description, change the trigger phrasing, or move to
+  a deterministic hook.
+- Compliance verdict mix unexpected → either the rubric is wrong or the
+  user is using the workflow differently than the spec assumed.
+
+Optimization changes route through the same cycle: telemetry-observed
+friction → updated workflow spec → updated harness → new functional
+pass → new telemetry. Don't optimize without re-passing the harness.
+
+### Audit trail
+
+Every workflow feature gets a short META_LEDGER entry at each phase
+boundary:
+
+```
+2026-05-01  workflow:bicameral-capture-corrections  phase=3→4
+  harness PR: BicameralAI/bicameral-mcp#147 (SKIP→SETUP)
+  spec: BicameralAI/bicameral#108 § Flow 4
+  next: implementation PR + telemetry wiring
+```
+
+This makes it possible to look at any open workflow feature and
+immediately see which phase it's in, what's blocking the next phase, and
+where the spec lives. It's also the first place to look when a feature
+ships and silently regresses — phase boundaries are where the harness
+should pass before/after the change.
+
+---
+
+## 1. Lifecycle map
+
+```
+┌──────────┐   ┌────────┐   ┌──────────┐   ┌─────┐   ┌─────────────┐   ┌──────┐   ┌────────┐
+│  Issue   │──▶│ Branch │──▶│ Feature  │──▶│ dev │──▶│  Release PR │──▶│ main │──▶│  Tag   │
+│ (#nnn)   │   │ named  │   │   PR     │   │     │   │  (dev→main) │   │      │   │ vX.Y.Z │
+│          │   │ /<n>-x │   │ → dev    │   │     │   │             │   │      │   │        │
+└──────────┘   └────────┘   └──────────┘   └─────┘   └─────────────┘   └──────┘   └────────┘
+     │              │            │           │              │             │           │
+     │              │       Closes #nnn      │              │             │      GitHub
+     │              │       on squash        │       Bumps version,       │      Release
+     │              │            │           │       CHANGELOG flip,      │      published
+     │              │            ▼           │       milestone close      │           │
+     │              │      CI must pass      │              │             │           ▼
+     │              │      QOR seal in       │              ▼             │    Help/training
+     │              │      META_LEDGER       │      Squash-merge          │    docs published
+     │              │                        │      OR merge commit       │
+     ▼              ▼                        ▼                            ▼
+ Milestone:    Branch name:                Issue auto-closed,    User-facing release;
+ vX.Y.Z        <issue#>-<short-slug>       milestone open        upstream consumers
+                                           ("pending release")    pull from main
+```
+
+**One rule of thumb**: any work that touches user-visible behavior must traverse
+every box in that diagram. No back-doors to `main`.
+
+---
+
+## 2. Issues
+
+### 2.1 Creating
+
+- **Title**: imperative, scoped. `feat(codegenome): semantic drift evaluation in resolve_compliance`,
+  not "add drift evaluation". **Do not** prefix with `[P0]`/`[P1]`/`[P2]` — use
+  the priority labels in §2.1.1 instead.
+- **Required labels** (apply at least one of each mandatory axis):
+  - **Type** (mandatory): `feat`, `fix`, `docs`, `chore`, `test`, `refactor`, `perf`, `security`.
+  - **Surface** (mandatory): `tool`, `skill`, `ledger`, `code-locator`, `codegenome`, `infra`, `docs-only`.
+  - **Priority** (mandatory after triage): see §2.1.1 below.
+  - **State** (optional): see §2.1.2 below.
+- **Milestone**: attach to the next-up release (`v0.14.0`). If you don't know
+  which release it lands in, attach to `vNext-triage` and let Jin re-assign.
+- **Body template** (see `.github/ISSUE_TEMPLATE/`):
+  - **Why**: one paragraph. The product decision this serves.
+  - **What**: the smallest change that satisfies "Why".
+  - **Out of scope**: explicit exclusions. Stops scope creep at PR-review time.
+  - **Acceptance**: bullet list of testable conditions. CI green is implied; add
+    behavioural checks ("`link_commit` returns `auto_resolved_count` ≥ 0").
+
+> **Risk** (`risk:L1` / `risk:L2` / `risk:L3`) lives on **PRs**, not issues —
+> see §4.4. Risk is a property of the change being made, knowable only after
+> design. Issues carry priority (urgency); PRs carry risk (review tier).
+
+#### 2.1.1 Priority labels (one per issue, mandatory after triage)
+
+Exactly one priority label per triaged issue. Untriaged issues carry `triage`
+(see §2.1.2) until a maintainer assigns priority.
+
+| Label | Color | Meaning |
+|---|---|---|
+| `P0` | red | Critical — drop everything. Production down, data loss, security regression, ledger corruption. **Triggers an immediate response, even off-hours.** |
+| `P1` | orange | High — ship this milestone. User-impacting bug or committed feature with a deadline. |
+| `P2` | yellow | Medium — next milestone or two. The default for routine new feature work and non-urgent bugs. |
+| `P3` | grey | Low — eventually. Nice-to-have, polish, non-load-bearing improvements. |
+
+**Calibration heuristics**:
+
+- *"If this stays open for the next two months, will any user be unhappy?"*
+  → No: `P3`. Yes: at least `P2`.
+- *"Is there a workaround that's acceptable for the next milestone?"*
+  → Yes: `P2` or lower. No: at least `P1`.
+- *"Is anyone losing data, money, or trust right now?"*
+  → Yes: `P0`. No: not `P0`.
+
+**P0 is rare.** If we have more than two open `P0` issues at any time, something
+is wrong with our triage discipline — `P0` should mean *"the team stops other
+work"*. Promoting too many issues to `P0` dilutes the signal.
+
+#### 2.1.2 State labels (optional, orthogonal to priority)
+
+| Label | Color | Meaning |
+|---|---|---|
+| `triage` | light grey | Needs assessment; no priority assigned yet. Default for newly-filed issues. |
+| `blocked` | dark grey | Temporarily blocked by another issue or external dependency. Always include a comment naming the blocker. |
+| `parked` | purple | Known issue, deferred indefinitely (external blocker, strategic pause, cost > benefit at current scale). Not abandoned, but not on a roadmap. **Only maintainers apply `parked`.** |
+
+State labels are mostly orthogonal to priority — with one exception:
+
+- **`triage` and `blocked` coexist with priority.** A `P1 + blocked` issue is
+  high-priority work waiting on a dependency; a `triage` issue gets a priority
+  label as soon as a maintainer assesses it.
+- **`parked` supersedes priority.** Don't apply both. A parked issue is, by
+  definition, not on the priority axis — it's deferred indefinitely. Adding
+  `P3` to a `parked` issue is redundant and clutters the label list. If a
+  parked issue ever becomes actionable, drop `parked` and assign a real
+  priority at that moment.
+
+**Never close a `parked` issue** — keep it open as a known-deferred record
+so future filers find it.
+
+The existing `merged-to-dev` label (post-merge status, not pre-merge state)
+remains separate from this axis. See §6.8.
+
+### 2.2 Closure
+
+`Closes #X` in a PR body **fires when that PR's HEAD merges into its BASE**, not
+when work reaches `main`. PRs target `dev`, so issues close at the dev-merge.
+
+Why we keep auto-close on dev: closure tracks "the work is in code", milestones
+track "the work is shipped". Two signals, two artifacts.
+
+### 2.3 Reopening
+
+If a hotfix or follow-up reveals the dev work was wrong, **reopen the original
+issue** rather than filing a new one — keeps history threaded. Add a comment
+linking the regression's hotfix PR.
+
+---
+
+## 3. Branches
+
+### 3.1 Naming
+
+`<issue#>-<short-slug>` from a fork.
+
+```
+Knapp-Kevin/codegenome-phase-4-qor    ← acceptable (descriptive slug)
+Knapp-Kevin/61-drift-classifier       ← preferred (issue-numbered)
+Knapp-Kevin/main                      ← never push feature work to fork's main
+Knapp-Kevin/dev                       ← does not exist (BicameralAI/dev is canonical)
+```
+
+A fork's `dev` branch is **not** maintained. The integration branch is exactly
+one place: `BicameralAI/dev`.
+
+### 3.2 Branching off
+
+Always branch off `BicameralAI/dev`, never `main`. `dev` is what other in-flight
+work has integrated against; `main` is a moving snapshot of the last release.
+
+```bash
+git fetch BicameralAI dev
+git checkout -b 61-drift-classifier BicameralAI/dev
+```
+
+### 3.3 Stacking
+
+Stacked PRs (PR B depends on PR A's branch) are tolerated for short windows
+(< 48 h). Rebase the stack onto `dev` the moment the bottom PR merges. Long
+stacks compound merge-conflict risk and review fatigue.
+
+---
+
+## 4. Pull Requests
+
+### 4.1 Targeting
+
+**All feature/fix PRs target `dev`.** The release PR (and only the release PR)
+targets `main`. CI workflows enforce both: `pull_request: branches: [main, dev]`.
+
+#### 4.1.1 Flow labels (mandatory)
+
+Every PR carries exactly one `flow:` label so contributors and reviewers can
+tell at a glance which lane it's in. The label mirrors the target branch but
+disambiguates the two cases that share `main`:
+
+| Label | Color | Target | Meaning |
+|---|---|---|---|
+| `flow:feature` | green | `dev` | Standard feature/fix going through the integration branch. The default. |
+| `flow:release` | blue | `main` | Periodic `dev → main` release PR opened by the release manager. Carries no new code — only the integrated `dev` HEAD. |
+| `flow:hotfix` | red | `main` | Emergency fix bypassing `dev`. Sets the §10 sync-back-to-dev clock. |
+
+Why labels in addition to the base branch:
+
+- `gh pr list --base main` returns *both* release PRs and hotfix PRs — different
+  processes, different review tiers, different urgencies. The label
+  disambiguates.
+- Filters like `gh pr list --label flow:hotfix --state closed` give a clean
+  audit trail of every emergency bypass over time. We want that visible.
+- Dependabot auto-applies `flow:feature` via `.github/dependabot.yml`; nothing
+  arrives without a flow label.
+
+Reviewers can refuse to review a PR that has no `flow:` label — the contract
+is "label first, review second."
+
+**Distinct from the post-merge `merged-to-dev` label.** That one tracks
+*status* ("this work has landed on dev but not yet on main"). The `flow:`
+labels track *intent* (which lane the PR is in). Both can coexist on a single
+PR after merge if Jin uses `merged-to-dev` to surface his release queue.
+
+### 4.2 Title
+
+`<type>(<surface>): <imperative summary>` — the same shape as the issue title.
+The squash commit message inherits this; loose PR titles produce ugly history.
+
+### 4.3 Body — required sections
+
+```markdown
+## Summary
+1–3 bullets, user-facing outcome.
+
+## Linked issues
+Closes #61
+Refs #60 (depends on continuity matcher landed there)
+
+## Plan / Audit / Seal
+- Plan: docs/Planning/plan-codegenome-phase-4.md (v3, content hash sha256:911171cf…)
+- Audit: META_LEDGER Entry #13, chain hash 21ac210f… — verdict PASS
+- Seal:  META_LEDGER Entry #14, chain hash 0ebcf69b…
+
+## Test plan
+- [ ] `pytest tests/test_codegenome_drift_classifier.py -q` (32/32)
+- [ ] `pytest tests/test_m3_benchmark.py -q` (5/5)
+- [ ] regression: `pytest -q` (189/189)
+```
+
+The Plan/Audit/Seal section is **mandatory for any PR > 100 LOC or risk:L2+**.
+Smaller PRs may use `Plan: trivial; risk:L1`.
+
+### 4.4 Reviewers
+
+- Code-owner from `CODEOWNERS` is auto-requested.
+- **Risk:L3 PRs**: require a second reviewer + a security-pass note in the
+  description.
+- **Risk:L2 PRs**: one reviewer.
+- **Risk:L1 PRs** (typo, comment fixes, dep bumps from Dependabot with green
+  CI): owner self-merge after CI is green.
+
+### 4.5 CI gates
+
+Two-tier model: a fast set on every PR-to-`dev`, a deeper set on the release
+PR (`dev` → `main`). The asymmetry is deliberate — see §4.5.3.
+
+#### 4.5.1 Tier 1 — PR → `dev` (fast, blocks every PR)
+
+The bar is *"this won't break dev for everyone else."* Target wall-clock: under
+5 minutes. Red on any of these blocks merge.
+
+| Gate | Workflow / tool | Why |
+|---|---|---|
+| **Lint** | `ruff` + `black --check` | Catches style drift, dead imports, unused vars before review |
+| **Type check** | `mypy` (or `pyright`) | Type errors surface at runtime via Pydantic boundaries; keep them at PR-time |
+| **Unit + integration tests (Linux)** | `test-mcp-regression.yml` (existing) | Core regression suite |
+| **Unit + integration tests (Windows)** | matrix on `test-mcp-regression.yml` | Three of the last four bugs (#67, #68, #74) were Windows-only — manual verification is not a strategy |
+| **Schema persistence smoke** | `test-schema-persistence.yml` (existing) | Schema bugs are silent killers; cheap to run |
+| **Module import smoke** | `python -c "import server, telemetry, consent, ..."` | Catches missing modules / circular imports in seconds |
+| **Secret scan** | `gitleaks` or `trufflehog`, fail-on-find | API keys, tokens, credentials in code or test fixtures |
+| **`pip check`** | one-liner job | Detects broken dependency tree on the PR's `pip install -e .[test]` |
+| **`merged-to-dev` label automation** | post-merge GitHub Action | Auto-applies the label on merge; resolves the manual labeling problem from the PR-A audit |
+
+#### 4.5.2 Tier 2 — Release PR (`dev` → `main`)
+
+The bar is *"this is releasable to users."* Inherits all Tier 1 gates plus the
+following. Can run 10–20 minutes; runs less often (one release PR at a time).
+
+| Gate | Workflow / tool | Why |
+|---|---|---|
+| **All Tier 1 gates** | — | Inherits dev's bar |
+| **Full regression including slow markers** | `pytest -m "not bench"` | Tier 1 may exclude `alpha_flow`, `desync_scenarios`; the release run includes them |
+| **Preflight eval — blocking** | `preflight-eval.yml` (currently advisory) | Currently advisory on every PR; should block release if drift precision regresses |
+| **Schema migration validation against persistent DB with seed data** | bespoke job | Beyond the smoke — apply migration on a `v_(N-1)` seed, assert no row loss + roundtrip works |
+| **Performance regression** | bespoke job | Drift detection p50, ingest throughput, search latency. Fail if > 15% regression vs `main`'s last successful run |
+| **Security scan** | `bandit`, `pip-audit`, GitHub Dependency Review | Required before any user touches the binary |
+| **CHANGELOG enforcement** | bespoke job | Reject release PR if `CHANGELOG.md` does not move `## Unreleased` content under a new `## [vX.Y.Z]` block |
+| **Version monotonicity** | bespoke job | Version in `pyproject.toml` must be `>` current `main` tag |
+| **MCP protocol live smoke** | bespoke job | Spawn server, call each tool over stdio, assert response shape. Catches handler-registration / Pydantic-boundary issues unit tests miss |
+| **Issue auto-close on merge** | post-merge action | `Closes #N` fires on merge into the PR's base; on release PR merge to `main`, also strip the `merged-to-dev` label from issues whose fix is now shipped |
+
+#### 4.5.3 Why the split
+
+The asymmetry isn't arbitrary — it's about **failure cost vs velocity**:
+
+| Concern | dev gate | main gate |
+|---|---|---|
+| Style / type errors | Block dev (cheap to fix at PR time) | Inherited |
+| Windows breakage | Block dev (recent bug history mandates) | Inherited |
+| Eval regression | Advisory on dev (don't slow feature work for noise) | **Block main** (release quality) |
+| Performance regression | Don't run (too slow per PR) | **Block main** |
+| CHANGELOG / version | Don't enforce (dev work is in-flight) | **Block main** |
+| Security scan | Don't run per PR (slow, noisy) | **Block main** |
+| MCP protocol live smoke | Don't run (requires server boot) | **Block main** |
+
+#### 4.5.4 Implementation phases (current state vs target)
+
+A dev-cycle gate is only as strong as its branch-protection rule. Adding the
+workflow file is half the job; the other half is requiring it via the GitHub
+"Require status checks to pass before merging" setting on `dev` and `main`.
+
+**Phase 1 — biggest impact, low risk** (open as one chore PR):
+
+1. Add Windows test job to `test-mcp-regression.yml` matrix
+   (`runs-on: [ubuntu-latest, windows-latest]`).
+2. Add `lint-and-typecheck.yml` (ruff + mypy) running on all PRs.
+3. Add `secret-scan.yml` (gitleaks) on all PRs.
+4. Add the `merged-to-dev` auto-labeller as a post-merge action on `dev`.
+5. Update `dev` branch-protection to require: lint, typecheck, regression
+   (Linux + Windows), schema persistence, secret scan.
+
+**Phase 2 — release-quality gates**:
+
+6. Convert `preflight-eval.yml` from advisory to blocking on `main`-bound PRs
+   only (use `if: github.base_ref == 'main'`).
+7. New `release-gates.yml` running only on `main`-bound PRs: CHANGELOG diff,
+   version monotonicity, MCP live smoke.
+8. Add `bandit` + `pip-audit` to `release-gates`.
+9. Performance baseline harness — capture drift detection p50 and search
+   latency; compare against `main`'s last successful run.
+10. Update `main` branch-protection to require all Tier 1 + Tier 2 checks.
+
+**Phase 3 — nice to have**:
+
+11. Auto-close `merged-to-dev` issues when `dev` → `main` forward-merges.
+12. Sticky PR-comment bot for preflight-eval results (covered by issue #49).
+
+Until Phase 1 ships, the documented Tier 1 list is **aspirational** — only
+`test-mcp-regression`, `test-schema-persistence`, and `preflight-eval`
+(advisory) actually run today. Reviewers should treat the rest as their own
+responsibility (run lint locally, verify on Windows, etc.) until the gates
+land.
+
+Red CI blocks merge. Don't ask reviewers to look at red PRs.
+
+### 4.6 Review feedback discipline
+
+CodeRabbit, Devin, and human reviewers all leave comments. The author's job:
+
+- **Address** every actionable comment with a commit or a reply justifying
+  decline.
+- **Resolve** the conversation thread only after addressing.
+- **Never** push `--force` on a PR with active review threads — comments lose
+  their line anchors. Use `--force-with-lease` only after a `git fetch`, and
+  call it out in a PR comment so reviewers re-fetch.
+
+---
+
+## 5. Merging to `dev`
+
+### 5.1 Strategy
+
+**Squash merging is disabled at the repo level** (`allow_squash_merge: false`)
+so the wrong choice is unavailable, not just discouraged. The reason this
+matters at all — beyond style preference — is that squash collapses
+multi-commit PRs into opaque blobs that cannot be cleanly cherry-picked into
+the §10.5 triage lane. See §10.5.0 "Why this lane exists" for the full
+rationale. Two options remain:
+
+| Merge style | When to use | Rationale |
+|---|---|---|
+| **Rebase and merge** *(default — covers ~all PRs)* | Single-commit PRs; multi-commit features; any PR a maintainer might backport to `triage-from-dev`; any PR with a `Triage-Cc:` trailer (see §10.5); Dependabot bumps | Preserves atomic commits as individually-cherry-pickable SHAs on `dev`. For single-commit PRs, this is the literal squash equivalent (one commit on `dev`) without the opaque-blob failure mode. GitHub's docs explicitly warn that squashing long-running branches "makes merge conflicts more likely … you'll have to resolve the same conflicts repeatedly." |
+| **Merge commit (`--no-ff`)** | Multi-commit features whose grouping matters historically (e.g. coordinated multi-handler refactor); any PR you may want to revert atomically with `git revert -m 1` | Preserves both individual commits *and* the merge boundary. Use sparingly — `dev` log gets noisy fast. |
+
+**Author obligation, not just merger obligation.** If you write a PR that may be
+triage-eligible, write atomic commits — one logical change per commit, each
+individually buildable, each with a meaningful subject line. The Linux kernel's
+atomic-commit discipline ([Linus on commit messages](https://yarchive.net/comp/linux/commit_messages.html))
+exists precisely so cherry-pick is mechanical, not interpretive. Reviewers may
+ask you to reorganize. WIP messages like `wip`, `fix typo`, `address review`
+should be squashed locally with `git rebase -i` *before* the PR is merged —
+since repo-level squash is off, the rebase-and-merge button will preserve them
+verbatim otherwise.
+
+### 5.2 Pre-merge checklist (for the merger)
+
+- [ ] CI green
+- [ ] All review threads resolved
+- [ ] Milestone attached on the PR (== same milestone as the issue)
+- [ ] Plan / Audit / Seal references exist for non-trivial PRs
+- [ ] CHANGELOG `## Unreleased` updated (or PR explicitly states "no user-visible change")
+
+### 5.3 Post-merge
+
+- Issue auto-closes (via `Closes #X`).
+- Milestone progress bar advances.
+- Branch may be deleted (GitHub default).
+- If the work shipped a new tool / new tool field / changed default, the matching
+  `pilot/mcp/skills/<tool>/SKILL.md` **must** be in the same PR — for
+  rebase-and-merge, in the same atomic commit; for merge-commit, in one of the
+  commits being merged. Project rule from `CLAUDE.md`. Reviewers reject
+  silently-mismatched skill contracts.
+
+---
+
+## 6. Release cycle
+
+### 6.1 Cadence
+
+- **Minor releases** (`v0.X.0`): roughly every 2–3 weeks, when the milestone is
+  full and `dev` is stable.
+- **Patch releases** (`v0.X.Y`): as needed for bug fixes that can't wait.
+- **Major release** (`v1.0.0`): scheduled; not driven by milestone fill.
+
+Jin owns the call on "is `dev` ready to ship". Heuristic: milestone closed-issue
+count covers the headline features, and CI on `dev` HEAD has been green for ≥ 24 h.
+
+### 6.2 Version selection
+
+Semver applies:
+
+- **PATCH** — bug fix only, no public-API change, no schema migration.
+- **MINOR** — new tool / new tool field / new schema migration that is **additive**
+  with a registered `_migrate_vN_to_vN+1` and bumped `SCHEMA_COMPATIBILITY` map.
+- **MAJOR** — breaking change to a tool's request/response shape, or a destructive
+  schema migration, or a CLI flag rename.
+
+If the change is borderline, round **up**. Schema-migrating PRs are never PATCH.
+
+### 6.3 The release PR (`dev` → `main`)
+
+Jin opens this PR. It targets `main`, base = `main`, head = `dev`.
+
+**Title**: `release: v0.13.0`
+
+**Body**:
+
+```markdown
+## Release v0.13.0
+
+### Headline
+One sentence the README and Twitter post can both quote.
+
+### Included issues
+Closes milestone v0.13.0
+- #61 — CodeGenome Phase 4 (semantic drift evaluation)
+- #75 — <…>
+- …
+
+### Schema
+- Migrates ledger v13 → v14 (additive: CHANGEFEED on compliance_check,
+  semantic_status, evidence_refs)
+
+### Breaking changes
+None. (or: list each.)
+
+### Documentation
+- CHANGELOG.md — v0.13.0 section
+- skills/bicameral-sync/SKILL.md — Phase 3+4 callout updated
+- README.md — bumped feature list (if applicable)
+- New: docs/DEV_CYCLE.md
+```
+
+### 6.4 Pre-release checklist
+
+Jin runs through this before merging the release PR. Items marked **CI** are
+enforced by the Tier 2 gates in §4.5.2 once Phase 2 lands; until then they are
+manual.
+
+- [ ] **CHANGELOG flip** — move `## Unreleased` content under `## [v0.13.0] - 2026-04-29`.
+      Add a fresh empty `## Unreleased` block at the top. **(CI: CHANGELOG enforcement)**
+- [ ] **Version bump** — update `pyproject.toml` / `__init__.py` / wherever the
+      canonical version lives. **(CI: version monotonicity)**
+- [ ] **`SCHEMA_COMPATIBILITY` map** — confirm the new schema version maps to the
+      new release version (e.g. `14: "0.13.0"`). **(CI: schema migration validation)**
+- [ ] **Skill files** — every changed skill is committed in `pilot/mcp/skills/`,
+      not just in `.claude/skills/`.
+- [ ] **Help / training docs** (see §8) — published for any feature on the
+      "user-touching" list.
+- [ ] **Demo readiness** — at least one demo script (§11) covers each headline
+      feature.
+- [ ] **CI on `dev` HEAD** — green for ≥ 24 h. **(CI: full regression incl. slow markers)**
+- [ ] **Preflight eval** — blocking gate, no regression vs `main`'s baseline.
+      **(CI: preflight-eval blocking on `main`-bound)**
+- [ ] **Performance** — drift detection p50, ingest throughput, search latency
+      within ±15 % of `main`'s last successful run. **(CI: performance regression)**
+- [ ] **Security scan** — `bandit` + `pip-audit` + GitHub Dependency Review
+      clean. **(CI: security scan)**
+- [ ] **MCP protocol live smoke** — server boots, every registered tool returns
+      a shape-conformant response over stdio. **(CI: MCP protocol live smoke)**
+- [ ] **Milestone** — every issue under it is closed.
+
+### 6.5 Merging the release PR
+
+**Strategy**: **merge-commit**, not squash. `main` is meant to preserve the
+release boundary in history; a merge commit ("`Merge dev into main for
+v0.13.0`") gives `git log main` a clean release-by-release walk.
+
+```bash
+git checkout main
+git pull
+git merge --no-ff dev -m "release: v0.13.0"
+git push
+```
+
+GitHub's UI "Create a merge commit" button does the same.
+
+### 6.6 Tagging
+
+Immediately after the merge:
+
+```bash
+git tag -a v0.13.0 -m "Release v0.13.0 — CodeGenome Phase 4 (semantic drift)"
+git push --tags
+```
+
+Tag format: `vMAJOR.MINOR.PATCH`. Annotated, never lightweight. The annotation
+body is the headline sentence from the release PR.
+
+### 6.7 GitHub Release
+
+Create a Release object on GitHub from the tag (`gh release create v0.13.0` or
+the UI):
+
+**Title**: `v0.13.0 — CodeGenome Phase 4 (semantic drift)`
+
+**Body**: copy/paste the CHANGELOG section for this version, then append:
+
+```markdown
+---
+
+## Documentation
+- [Migration notes](https://…/docs/migrations/v0.13.md) — schema v13 → v14
+- [User guide for semantic drift evaluation](https://…/docs/guides/semantic-drift.md)
+- [Demo: cosmetic-vs-semantic auto-resolve](https://…/docs/demos/04-drift-classifier.md)
+
+## Verification
+Merkle seal: 0ebcf69b…
+META_LEDGER entries: #11 (VETO), #12 (PASS), #13 (PASS post-rebase), #14 (seal)
+```
+
+**Attachments**: none for now (we ship via PyPI/source). When we ship binaries,
+attach platform builds here.
+
+### 6.8 Post-release
+
+- Close the milestone.
+- Open the next milestone (`v0.14.0`).
+- Announce: README badge bump, project README "Latest" line, optional Slack /
+  Discord drop. Use the headline sentence verbatim.
+
+---
+
+## 7. CHANGELOG.md conventions
+
+We follow [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) loosely.
+
+**Top of file at all times**:
+
+```markdown
+## [Unreleased]
+
+### Added
+- (work in flight that's already merged to dev)
+
+### Changed
+### Fixed
+### Schema
+### Security
+```
+
+When Jin cuts a release, he replaces `[Unreleased]` with the version + date,
+then prepends a fresh empty `[Unreleased]` block.
+
+**Section ordering** (preserve even when empty — drop a section only at release
+flip): `Added`, `Changed`, `Deprecated`, `Removed`, `Fixed`, `Schema`,
+`Security`.
+
+**One bullet per logical change**, not per file. User-facing language. Internal
+governance details (chain hashes, verdicts) stay out of CHANGELOG; they live in
+META_LEDGER.
+
+---
+
+## 8. Documentation requirements per release
+
+Some features ship with code only. Some ship with code **plus** mandatory docs.
+Use this matrix:
+
+| Feature class | User-touching? | Docs required |
+|---|---|---|
+| New MCP tool | yes | `pilot/mcp/skills/<tool>/SKILL.md` + entry in `README.md#tools` |
+| New tool field / new status value | yes | Update every skill that renders the field |
+| New schema migration | indirect | `docs/migrations/vN.md` — what changes, automatic or manual |
+| New caller-facing helper (e.g. `ensure_ledger_synced`) | yes | `docs/guides/<feature>.md` user guide |
+| New deterministic primitive (e.g. continuity matcher) | yes | demo script in `docs/demos/` |
+| Bug fix without behavior change | no | CHANGELOG entry only |
+| Internal refactor | no | CHANGELOG entry only ("Changed: …") |
+| Performance improvement | no, unless > 2× | CHANGELOG entry; `> 2×` adds a `docs/perf/` note |
+| Security fix | yes | CHANGELOG `### Security` entry + `SECURITY.md` advisory if disclosed |
+
+**Help docs go in**: `docs/guides/<feature>.md`. Structure:
+
+```markdown
+# <Feature> — User Guide
+
+## What it does
+One paragraph.
+
+## When you'd use it
+Bulleted scenarios.
+
+## Quickstart
+Smallest end-to-end example.
+
+## Reference
+Tool name, request shape, response shape, error modes.
+
+## See also
+Links to related guides + demo script.
+```
+
+**Training docs** (longer-form, multi-step walkthroughs intended to teach a
+concept, not just document a tool) go in `docs/training/<topic>.md`. These are
+optional unless the feature introduces a concept the user must internalize
+(example: "what does `pending` vs `reflected` mean?" — that's training, not
+reference).
+
+---
+
+## 9. Skill file rule (project-specific, mandatory)
+
+From `CLAUDE.md`:
+
+> Any change to an MCP tool's behavior — new fields in a response, new status
+> values, changed defaults, new tool calls, deprecated params — **must ship
+> with a matching update to the relevant `pilot/mcp/skills/*/SKILL.md`** in the
+> same commit.
+
+This is enforced at review time. `pilot/mcp/skills/` is canonical;
+`.claude/skills/bicameral-*/SKILL.md` copies are stale and slated for deletion.
+
+---
+
+## 10. Hotfix path (main → main → dev)
+
+When `main` has a bug that can't wait for the next release:
+
+```
+                                    ┌──── tag v0.13.1 ────┐
+main ─────●─────────────────────────●─────────────────────●─────▶
+           \                       /                       \
+            └── hotfix/0.13.1 ────┘                         │
+                                                            │ merge or
+                                                            │ cherry-pick
+                                                            ▼
+dev  ─────────────────────────────────────────────────────●─────▶
+```
+
+1. Branch from `main` (not `dev`): `hotfix/0.13.1-<slug>`.
+2. Smallest possible diff. No tangential cleanup.
+3. PR targets `main`. Reviewer approves; CI green.
+4. Merge to `main`, tag `v0.13.1`, GitHub Release.
+5. **Immediately** sync to `dev`: either merge `main` into `dev` or cherry-pick
+   the hotfix commit. Resolve conflicts. Push. Don't let `dev` and `main`
+   diverge in opposite directions for more than an hour.
+
+Hotfixes never carry feature work — feature work goes through the normal
+feature → dev → release cycle.
+
+### 10.5 Triage lane (`dev` → `triage-from-dev` → `main`)
+
+`triage-from-dev` is a long-lived **curated stable lane** that ships a *subset*
+of `dev` to `main` between full releases. It exists for changes that should
+reach users faster than the next minor release allows, but that aren't
+emergency hotfixes (which use §10's path).
+
+#### 10.5.0 Why this lane exists
+
+The triage lane plus the §5.1 rebase-and-merge default (with squash disabled
+at the repo level) together **allow for parallel development of feature work
+on `dev` and selective incorporation into production based on live feedback**.
+
+That goal decomposes into three constraints the existing two-branch flow
+(feature → dev → main) cannot satisfy on its own:
+
+- **Fast iteration on `dev` shouldn't gate user-visible delivery on `main`.**
+  Without a triage lane, every minor-release cycle is "ship the whole
+  integrated batch or wait." A bug fix that's ready in week one of a six-week
+  release cycle waits five weeks for a milestone full of unrelated work to
+  close. The triage lane lets ready-and-eligible work reach users on its own
+  cadence.
+- **Live feedback should steer what reaches `main`, not just what reaches
+  `dev`.** When telemetry / a customer report / a security finding marks a
+  specific change as important, the maintainer needs to be able to ship that
+  change *without* shipping everything ahead of it on `dev`. Cherry-picking a
+  selected subset (under §10.5.1's eligibility rule) is that mechanism.
+- **The merge style on `dev` must preserve cherry-pickability.** Squash
+  collapses a multi-commit PR into one opaque blob — fine for `dev`'s log,
+  fatal for backport. Rebase-and-merge keeps each commit as an individually
+  addressable SHA, which is the unit the §10.5.3 cherry-pick mechanic operates
+  on. §5.1's "squash disabled at the repo level" exists to make this
+  guarantee structural rather than aspirational.
+
+Together these rules let the project hold two timelines: a fast-iteration
+trunk where features can land in pieces and the team can change its mind, and
+a slower curated trunk where users see only what's been deemed ready for
+broad delivery. Neither trunk forces the other's cadence.
+
+```
+dev ────●────●────●────●────●────●─────▶
+            \         \    \
+             cherry-pick -x  (selected commits only)
+              \         \    \
+               ▼         ▼    ▼
+triage-from-dev ●────────●────●─────▶ ──── release PR ────▶ main
+```
+
+**Direction is one-way.** Cherry-picks flow `dev → triage-from-dev` only. Never
+develop on `triage-from-dev` directly; never cherry-pick `triage-from-dev →
+dev`. (Bugs introduced *only* on the triage lane get fixed on `dev` first, then
+re-cherry-picked.)
+
+#### 10.5.1 Eligibility — what gets triaged
+
+Modeled after the Linux kernel's `stable` tree rules
+([kernel.org stable rules](https://docs.kernel.org/process/stable-kernel-rules.html)).
+A commit is triage-eligible if **all** of:
+
+- It is small and self-contained (rough guideline: ≤ 100 lines of context-diff,
+  one logical change).
+- It is **obviously correct and tested** — the kernel's exact phrasing.
+- It fixes one of: a real user-facing bug, a security regression, a build break
+  on a supported platform, a data-loss/corruption bug, or a documented
+  cross-platform quirk. Or it is a small additive feature whose risk surface is
+  isolated (e.g. a new optional MCP tool field with a default).
+- It does not depend on `dev`-only refactors that haven't shipped to `main`. If
+  it does, the prerequisites must be triage-eligible too, and they all
+  cherry-pick as a coherent batch.
+
+**Not triage-eligible** by default: schema-migrating changes, breaking
+public-API changes, multi-PR feature epics, "v1 patches" (the catch-all
+`triage-from-dev` PR title uses for work explicitly held for the next major).
+
+When in doubt, the change waits for the next `dev → main` release.
+
+#### 10.5.2 Author trailer — `Triage-Cc:`
+
+If you (the author) believe a commit belongs on the triage lane, add a trailer:
+
+```
+Triage-Cc: triage-from-dev
+```
+
+For commits that fix an earlier commit (kernel-style), also add:
+
+```
+Fixes: <abbrev-sha> ("<subject of fixed commit>")
+```
+
+The release manager finds candidates with:
+
+```bash
+git log --grep='^Triage-Cc:' origin/dev ^origin/triage-from-dev
+```
+
+Trailers are advisory — the release manager makes the final call — but they
+make the candidate set legible without re-reading every commit message.
+
+#### 10.5.3 Cherry-pick mechanics
+
+Always use `cherry-pick -x` so the resulting commit message records its
+provenance (`(cherry picked from commit <dev-sha>)`):
+
+```bash
+git checkout triage-from-dev
+git fetch origin
+git cherry-pick -x <dev-sha>
+# resolve conflicts narrowly — do NOT pull in unrelated dev refactors
+git push origin triage-from-dev
+```
+
+When a cherry-pick conflicts, classify the conflict before resolving:
+
+- **Missing-prerequisite conflict** — the dev commit calls a function /
+  references a schema field / depends on a contract that does not exist on
+  `triage-from-dev` and is not introduced by this same commit. **Stop.** Either
+  pick the prerequisite first (if it is itself triage-eligible per §10.5.1) or
+  hold the change for the next full `dev → main` release.
+- **Diverged-surface conflict** — the change's *target file* has been
+  refactored on dev's path between triage's branch point and the cherry-pick
+  source, but every symbol / schema field / contract the cherry-picked commit
+  *actually depends on* either already exists on triage or is additively
+  introduced in this same commit. **Adaptable** — see below.
+
+##### Adaptation clause
+
+A diverged-surface conflict may be resolved by manually adapting the conflict
+hunks to triage's surrounding code, provided **all** of the following hold:
+
+1. The cherry-pick's *intent* (the conceptual change — e.g. "route through
+   new adapter method", "add replay case for new event type") is preserved.
+   The semantic effect on triage matches the semantic effect on dev from any
+   external caller's POV.
+2. No new logic is *invented* — every line in the resolution either comes
+   from the cherry-picked commit, exists on triage already, or is the
+   minimal mechanical glue to bridge the two (e.g. renaming a local variable
+   to match triage's existing identifier).
+3. Each adapted hunk is annotated:
+   - In the **commit message** under an `Adaptation:` trailer:
+     `Adaptation: handlers/ratify.py — rewrote against pre-#65 inline impl`
+   - In the **code itself**, where the adapted block isn't trivially obvious,
+     with `# triage-adapt: <one-line reason>` immediately above the block.
+
+If you find yourself writing a hunk that doesn't satisfy (2) — i.e. you're
+inventing logic to bridge the gap — the conflict is in fact a missing-
+prerequisite conflict in disguise. Stop and reclassify.
+
+The release manager reviews adapted commits with extra scrutiny at the
+§10.5.4 release PR; adapted commits should be a small fraction of any
+triage release, and a triage cycle that's mostly adaptations is a signal
+that the lane has drifted too far from `dev`.
+
+Resolving conflicts by inventing replacement code that does not satisfy the
+adaptation clause above is forbidden — the cherry-pick must remain a faithful
+subset of `dev`, modulo legitimate adaptation to a diverged surface.
+
+The fact that `triage-from-dev` already carries some commits with **different
+SHAs than dev** (e.g. v0.14.0 telemetry, RFC #98) is sunk cost from the lane's
+pre-§10.5 era. Going forward every cherry-pick uses `-x` and the audit trail
+re-converges. Do **not** rewrite history on `triage-from-dev` to fix the
+divergence — it is a published branch.
+
+#### 10.5.4 Release PR (`triage-from-dev` → `main`)
+
+The triage release PR follows §6 with two adjustments:
+
+- **Title**: `release: v0.X.Y (triage)` — the patch version bumps; minor stays
+  pinned to whatever `main` last tagged from a full `dev → main` release.
+- **Flow label**: `flow:release` (same as a full release).
+- **Body** lists each cherry-picked commit with its source `dev-sha` and the
+  issue/PR it traces back to.
+
+After the triage release tags on `main`, sync `main` back to `dev` per §10
+(merge or cherry-pick — the next-release CHANGELOG flip absorbs the patch).
+
+---
+
+## 11. Roles
+
+| Role | Owner | Responsibilities |
+|---|---|---|
+| **Contributor** | anyone | Open issues, branch off `dev`, open PRs to `dev`, address review feedback, keep skill files in sync. |
+| **Reviewer** | code-owners | Block on red CI, Razor violations, missing skill updates, missing Plan/Audit/Seal references on non-trivial PRs. |
+| **Release manager** | Jin | Decide release cadence, open release PR, run pre-release checklist, merge to `main`, tag, publish GitHub Release, manage milestones. |
+| **Doc steward** | rotating | Verify the §8 matrix is satisfied before each release. |
+| **Governance steward** | QOR-chain owner | Verify META_LEDGER chain integrity at each release seal. |
+
+Single-maintainer fallback: if Jin is offline, the release waits. We do not
+unilaterally promote `dev` → `main`.
+
+---
+
+## 12. Demo scripts
+
+Every shipped feature should have at least one runnable demo that takes a
+viewer from "I don't know what this does" to "I see the value" in under 5
+minutes. Demos live in `docs/demos/<NN>-<slug>.md` and follow the same template:
+
+```markdown
+# Demo NN: <Title>
+
+**Audience**: <e.g. "first-time evaluator">
+**Time**: <≤ 5 min>
+**Prereqs**: <repo cloned, deps installed, MCP server running>
+
+## What you'll see
+1-paragraph spoiler.
+
+## Setup
+Copy-pasteable shell block.
+
+## Walkthrough
+Numbered steps, each with the exact tool call / command and the expected
+output (truncated where it makes sense).
+
+## What just happened
+Plain-English read of the result. Tie it back to the user-value claim.
+
+## Next
+Pointer to the user guide and related demos.
+```
+
+Below: four demo scripts that cover the project's headline functionality. Each
+one should be authored as a standalone file and kept in sync with the matching
+skill / tool.
+
+### Demo 01 — First decision bind, search, drift detect
+
+**Path**: `docs/demos/01-first-bind.md`
+**Audience**: "I just installed bicameral-mcp; what's the loop?"
+
+**Storyline**:
+
+1. `bicameral.bind` a decision: *"all monetary calculations use `Decimal`,
+   never `float`"*. Show that the tool returns a region-id and a content hash.
+2. `bicameral.search_decisions` for the keyword `"monetary"`. Show the just-bound
+   decision returns at the top.
+3. Edit the bound region: change `Decimal` to `float` in the linked file.
+4. `bicameral.detect_drift`. Show that the region surfaces with status
+   `drifted`.
+5. Restore the file. Re-run. Status flips back to `reflected`.
+
+**Value claim**: "Your decisions are now first-class artifacts — searchable,
+hash-anchored, and drift-detected without you running anything by hand."
+
+### Demo 02 — Commit-sync loop (post-commit hook → resolve_compliance)
+
+**Path**: `docs/demos/02-commit-sync.md`
+**Audience**: "How does this play with my actual git workflow?"
+
+**Storyline**:
+
+1. Show the post-commit hook installed (`.git/hooks/post-commit`) calling
+   `bicameral-mcp link_commit HEAD`.
+2. Edit a bound region. `git commit`.
+3. Show the hook output: `bicameral: new commit detected`.
+4. Show `_pending_compliance_checks` injected into the next tool response.
+5. Walk through the `bicameral-sync` skill: read region → reason → batched
+   `resolve_compliance(verdicts=[...])`.
+6. Show the final ledger state: N reflected, N drifted, 0 pending.
+
+**Value claim**: "Compliance is computed automatically on every commit, not
+quarterly by a human auditor."
+
+### Demo 03 — Continuity matcher: function rename auto-redirect (Phase 3)
+
+**Path**: `docs/demos/03-continuity-rename.md`
+**Audience**: "What happens when I refactor?"
+
+**Storyline**:
+
+1. Bind a decision to a function `calculate_tax_v1`.
+2. Rename the function to `compute_tax`. Move it to a different file. Commit.
+3. Naïvely: the binding would orphan and the decision would go `ungrounded`.
+4. With `BICAMERAL_CODEGENOME_ENHANCE_DRIFT=1`: `link_commit` runs the
+   continuity matcher pre-pass.
+5. Show the response's `continuity_resolutions` list:
+   `semantic_status: identity_renamed`, the binding redirected, no manual
+   action needed.
+
+**Value claim**: "Refactoring no longer breaks your decision graph. The matcher
+recognises moved or renamed code and updates bindings automatically."
+
+### Demo 04 — Cosmetic-vs-semantic drift classifier (Phase 4)
+
+**Path**: `docs/demos/04-drift-classifier.md`
+**Audience**: "Why does this not flag every whitespace change as drift?"
+
+**Storyline**:
+
+1. Bind a decision to a function. Capture the baseline ledger state.
+2. **Cosmetic change**: re-format the docstring; re-order imports. Commit.
+   Run `link_commit`. Show `auto_resolved_count: 1`, status flips to
+   `compliant` with `semantic_status: semantically_preserved`. Zero LLM calls.
+3. **Semantic change**: change the threshold inside the function from 100
+   to 50. Commit. Run `link_commit`. Show the region appears in
+   `pending_compliance_checks` with a `pre_classification` hint
+   (`verdict: uncertain`, signals breakdown).
+4. Walk through the LLM-side reasoning the `bicameral-sync` skill applies to
+   issue the `drifted` verdict.
+5. Show the M3 benchmark: 30 cases × 7 languages, 0% false-positive rate on
+   the cosmetic-only set.
+
+**Value claim**: "The classifier handles the easy 80% deterministically, leaves
+only genuinely ambiguous cases for the LLM, and never costs you a token on a
+docstring tweak."
+
+### Authoring rules for new demos
+
+- Run the demo end-to-end on a fresh clone before committing it. Demos that
+  drift become anti-marketing.
+- If the demo depends on a feature flag (`BICAMERAL_CODEGENOME_ENHANCE_DRIFT`,
+  etc.), say so in **Prereqs**.
+- If the demo records output, store the recording in `docs/demos/recordings/`
+  next to the script. Keep recordings under 30 MB.
+- Update the demo whenever the underlying tool's response shape changes —
+  this is enforced under §9 (skill rule).
+
+---
+
+## 13. When in doubt
+
+- **"Does this need a release PR?"** — If `main`'s SHA would change, yes.
+- **"Should I close this issue?"** — `Closes #X` in the PR body, then yes
+  (auto on dev-merge).
+- **"Should I bump the version?"** — Only Jin bumps the version, only at
+  release time.
+- **"Can I commit a skill change separately from the tool change?"** — No.
+  Same commit, same PR.
+- **"Should I write a guide for this?"** — Use the §8 matrix. If the row says
+  "yes", yes.
+- **"Is this a hotfix or a feature?"** — Hotfix is for a regression on `main`
+  that broke a user. Everything else is a feature.
+
+---
+
+**Owner**: Jin (release manager) + repo maintainers.
+**Last reviewed**: 2026-04-29.
+**Change protocol**: amendments require a META_LEDGER entry + a PR labeled
+`docs:dev-cycle`.
diff --git a/docs/demos/README.md b/docs/demos/README.md
new file mode 100644
index 00000000..eab196b0
--- /dev/null
+++ b/docs/demos/README.md
@@ -0,0 +1,53 @@
+# Demos
+
+Runnable, ≤ 5-minute walkthroughs of headline functionality. Each demo takes a
+viewer from "I don't know what this does" to "I see the value" without leaving
+the file.
+
+See [`docs/DEV_CYCLE.md` §12](../DEV_CYCLE.md#12-demo-scripts) for the
+authoring rules and the demo template.
+
+## Index
+
+| # | Title | Audience | Status |
+|---|---|---|---|
+| 01 | First decision bind, search, drift detect | "what's the loop?" | planned |
+| 02 | Commit-sync hook → resolve_compliance | "how does it play with git?" | planned |
+| 03 | Continuity matcher: function rename auto-redirect (Phase 3) | "what about refactors?" | planned |
+| 04 | Cosmetic-vs-semantic drift classifier (Phase 4) | "why no whitespace false-flags?" | planned |
+| —  | [v0 user flow e2e (split-screen)](./v0-userflow-e2e.md) | "what does the loop look like end-to-end?" | live (manual workflow) |
+
+## Authoring rules (summary)
+
+- Run the demo end-to-end on a fresh clone before committing it.
+- If the demo depends on a feature flag (e.g.
+  `BICAMERAL_CODEGENOME_ENHANCE_DRIFT`), say so in **Prereqs**.
+- Recordings (≤ 30 MB) live in `recordings/` next to the script.
+- Update the demo whenever the underlying tool's response shape changes —
+  enforced by the skill rule in `DEV_CYCLE.md` §9.
+
+## Template
+
+```markdown
+# Demo NN: <Title>
+
+**Audience**: <e.g. "first-time evaluator">
+**Time**: <≤ 5 min>
+**Prereqs**: <repo cloned, deps installed, MCP server running>
+
+## What you'll see
+1-paragraph spoiler.
+
+## Setup
+Copy-pasteable shell block.
+
+## Walkthrough
+Numbered steps, each with the exact tool call / command and the expected
+output (truncated where it makes sense).
+
+## What just happened
+Plain-English read of the result. Tie it back to the user-value claim.
+
+## Next
+Pointer to the user guide and related demos.
+```
diff --git a/docs/demos/v0-userflow-e2e.md b/docs/demos/v0-userflow-e2e.md
new file mode 100644
index 00000000..cf951470
--- /dev/null
+++ b/docs/demos/v0-userflow-e2e.md
@@ -0,0 +1,101 @@
+# Demo: v0 user flow e2e (split-screen, two views)
+
+**Audience**: first-time evaluators who want to see the loop without running it.
+**Time**: ~6 min PM view, ~10 min Dev view.
+**Prereqs**: none — videos play in any browser.
+
+## What you'll see
+
+A continuous Claude Code CLI session — recorded once, then split in post
+into two persona-shaped videos:
+
+- **Left pane** of the recording — `xterm` running `claude -p <composite-prompt>`
+  with `bicameral-mcp` registered as the only MCP server. The LLM's reasoning,
+  tool calls, and outputs render in real time via a small stream-json formatter.
+- **Right pane** — `chromium` pointed at the bicameral dashboard sidecar
+  (`http://localhost:<port>`). Live SSE updates as the session emits ledger
+  writes. **Because both PM scenes and the Dev scene share one MCP process,
+  the dashboard state in the post-implementation chapter literally reflects
+  the commits the dev made on screen** — not a re-hydration from a separate
+  ledger.
+
+### `pm.mp4` (PM view)
+
+| Chapter | Tools used | What's on screen |
+|---|---|---|
+| 1. Post-meeting | `bicameral.dashboard`, `bicameral.ingest`, `bicameral.ratify` | PM ingests three GitHub Desktop roadmap decisions; the dashboard fills with proposed-then-ratified entries. |
+| _Transition slide_ | _(ffmpeg-generated)_ | "Dev now implements the change → Returning to PM after the implementation has landed." |
+| 2. Post-implementation | `bicameral.history`, `bicameral.ratify` | PM calls `history`; the cherry-pick decision now shows `status=reflected` (was pending). PM ratifies the post-implementation state. |
+
+### `dev.mp4` (Dev view)
+
+| Step | Tool | What's on screen |
+|---|---|---|
+| 1 | `bicameral.preflight` | Surfaces the cherry-pick decision before any edit. |
+| 2 | `Edit` | Single-line annotation added to `app/src/lib/git/cherry-pick.ts`. |
+| 3 | `Bash` (`git add` + `git commit`) | Real commit on the desktop/desktop fixture. |
+| 4 | `bicameral.link_commit` | Detects drift candidates against decisions bound to that file. |
+| 5 | `bicameral.resolve_compliance` | Verdict per pending compliance check (compliant / drifted / not_relevant). |
+| 6 | `bicameral.ingest` (source=agent_session) | Captures any session-end corrections. |
+
+A third file, `full.mp4`, contains the full unbroken arc — useful if you
+want to see the Dev's commits land in the dashboard without the
+transition cut.
+
+## How to access the latest demos
+
+The MP4s are generated on demand and **not committed to git** — they live in
+the `v0-user-flow-e2e-demos` artifact attached to the manual workflow run.
+
+1. Open the [v0 user flow e2e workflow runs](../../../../actions/workflows/v0-user-flow-e2e.yml).
+2. Filter to runs triggered via "Run workflow" with `record_demo = true`.
+3. Scroll to the run's **Artifacts** section, download `v0-user-flow-e2e-demos`.
+4. Unzip → `pm.mp4`, `dev.mp4`, `full.mp4`.
+
+Artifact retention is 90 days. On a release cut (per
+[`docs/DEV_CYCLE.md` §6.7](../DEV_CYCLE.md#67-github-release)), the maintainer
+attaches the latest demos to the GitHub release for permanent URLs.
+
+## How to record a fresh set
+
+Demos are intentionally manual — not gated on every PR — because they cost
+~25–35 minutes wall + Claude API spend per run.
+
+1. Trigger via the workflow's **Run workflow** dropdown (UI), or:
+   ```bash
+   gh workflow run v0-user-flow-e2e.yml -f record_demo=true
+   ```
+2. Wait for the run to finish. The assertion step still runs first and is
+   the authority on pass/fail; the recording step is `continue-on-error`,
+   so a flake never blocks merge.
+3. Download the `v0-user-flow-e2e-demos` artifact as above.
+
+## How the split works
+
+`tests/e2e/record_demo.sh` runs one continuous claude session driven by
+`tests/e2e/prompts/composite-demo.md` (three scenes: PM-pre, Dev, PM-post).
+The session's stream-json output is piped through
+`tests/e2e/demo_renderer.py`, which:
+
+1. Pretty-prints to stdout so the xterm shows readable text.
+2. Watches the tool-call timeline and writes wall-clock timestamps to
+   `composite-demo-scenes.txt` at two boundaries:
+   - **Scene 1 → 2** = first `bicameral.preflight` call (Dev starts).
+   - **Scene 2 → 3** = first `bicameral.history` call after any
+     `bicameral.link_commit` (PM resumes).
+3. Persists the raw stream-json transcript for forensic review.
+
+After ffmpeg stops, the script trims `full.mp4` at those two timestamps
+into `pm-pre`, `dev`, `pm-post`, generates a 4-second transition slide via
+`drawtext`, and concats `pm-pre + transition + pm-post → pm.mp4`.
+
+If scene markers are missing (e.g., the LLM declined a step), the script
+falls back to keeping `full.mp4` only — the recording is preserved but
+the split is skipped.
+
+## Next
+
+- [End-to-end suite README](../../tests/e2e/README.md) — the assertion-only
+  path that runs on every qualifying PR.
+- [`#108` spec](https://github.com/BicameralAI/bicameral/issues/108) — the
+  six canonical flows the composite prompt orchestrates.
diff --git a/docs/guides/README.md b/docs/guides/README.md
new file mode 100644
index 00000000..1b16b769
--- /dev/null
+++ b/docs/guides/README.md
@@ -0,0 +1,45 @@
+# User Guides
+
+Reference-style documentation for individual features. Pairs with the demos in
+`docs/demos/` (which show *how it feels*) by answering *what it does, when to
+use it, and what every field means*.
+
+See [`docs/DEV_CYCLE.md` §8](../DEV_CYCLE.md#8-documentation-requirements-per-release)
+for when a guide is required by the release process.
+
+## Index
+
+| Topic | Surface | Status |
+|---|---|---|
+| (none yet) | — | — |
+
+## Template
+
+```markdown
+# <Feature> — User Guide
+
+## What it does
+One paragraph.
+
+## When you'd use it
+Bulleted scenarios.
+
+## Quickstart
+Smallest end-to-end example.
+
+## Reference
+Tool name, request shape, response shape, error modes.
+
+## See also
+Links to related guides + demo script.
+```
+
+## Authoring rules
+
+- One guide per feature, named `<feature-slug>.md`.
+- Guides are reference, not tutorial — show field shapes and error modes
+  exhaustively. Tutorial-style content belongs in `docs/training/`.
+- A guide referenced by a release PR's documentation checklist must exist by
+  the time the release PR opens, not later.
+- When a tool's response shape changes, update the matching guide in the same
+  commit (per `DEV_CYCLE.md` §9 skill rule).
diff --git a/docs/preflight-failure-scenarios.md b/docs/preflight-failure-scenarios.md
index 7a30bb3e..1543e9cc 100644
--- a/docs/preflight-failure-scenarios.md
+++ b/docs/preflight-failure-scenarios.md
@@ -54,7 +54,7 @@ Status legend:
 | **M3** | skill | Internal acronym / jargon | Decision: *"Audit log captures every admin action..."* / Topic: `SOC2 compliance trail` | ⚪ |
 | **M4** | skill | Ungrounded decision (no `binds_to`) — only surfaces if skill judges its feature group relevant from history | Decision (status=ungrounded): *"Permission checks always run server-side"* / Topic: `permission middleware client check` | ⚪ |
 | **M5** | handler | Region-anchored miss — caller didn't pass `file_paths` | Topic: `update auth config` / `file_paths=[]` — handler returns no region matches; only HITL/guided can fire | ⚪ acknowledged caller responsibility; HITL still global |
-| **M6** | handler | Transitive — decision pinned to a dependency of `file_paths` | Decision pinned to `auth/jwt.py` / `file_paths=["auth/login_handler.py"]` (imports `jwt`) | ❌ region lookup only sees the direct file |
+| **M6** | handler | Transitive — decision pinned to a dependency of `file_paths` | Decision pinned to `auth/jwt.py` / `file_paths=["auth/login_handler.py"]` (imports `jwt`) | ✅ closed by #173/#174 — `_region_anchored_preflight` expands `file_paths` by 1 hop along import edges before the `binds_to` lookup; expansion-only matches surface with `confidence=0.7` and `sources_chained` adds `"graph"` |
 | **M7** | handler | Dedup-key coarseness — current key is `(topic)`; same topic with changed `file_paths`, new HITL state, or a fresh ledger revision is silenced | (a) Topic re-asked after a relevant decision lands; (b) topic kept stable while `file_paths` shifts to a different region; (c) HITL condition resolves mid-window | ❌ open — broaden cache key to `(topic, normalized_file_paths, ledger_revision)` and invalidate on HITL change |
 | **M8** | meta | Skill skips `bicameral.history()` despite non-empty ledger (skill-step adherence drift) | Caller LLM jumps straight to `bicameral.preflight` and never reads history | ⛔ skill-conformance, not handler-eval scope |
 | **M9** | meta | `BICAMERAL_PREFLIGHT_MUTE` set, developer forgot it's on | Env var carried over from prior debug session | ⛔ intentional kill switch |
diff --git a/docs/training/README.md b/docs/training/README.md
new file mode 100644
index 00000000..a0896f97
--- /dev/null
+++ b/docs/training/README.md
@@ -0,0 +1,61 @@
+# Training
+
+Long-form, multi-step walkthroughs that teach a *concept*, not a tool. Use
+training docs when a feature introduces an idea the user must internalise
+before the reference docs make sense.
+
+Examples of concepts that warrant training:
+
+- *"What does `pending` vs `reflected` vs `drifted` vs `ungrounded` actually
+  mean, and how does the ledger derive each?"*
+- *"What's a content-hash CAS guard, why does the server reject your verdict
+  when it doesn't match, and how do you recover?"*
+- *"How does the continuity matcher decide a renamed function is the same
+  identity?"*
+
+If the answer fits in a guide's intro paragraph, it's a guide, not a training
+doc.
+
+See [`docs/DEV_CYCLE.md` §8](../DEV_CYCLE.md#8-documentation-requirements-per-release)
+for when training is required by the release process (rule of thumb: only when
+the feature introduces a concept, not just a tool).
+
+## Index
+
+| Topic | Status |
+|---|---|
+| [Cosmetic vs semantic drift](./cosmetic-vs-semantic.md) | Active |
+
+## Template
+
+```markdown
+# <Concept> — Training
+
+## Why this exists
+Two sentences. The mental-model gap this doc closes.
+
+## Prerequisites
+What the reader should already understand or have read.
+
+## The concept
+The actual teaching content. Use diagrams, worked examples, anti-examples.
+Be willing to spend 1000+ words if the concept is load-bearing.
+
+## Worked example
+End-to-end scenario tying the concept to a real tool call.
+
+## Common pitfalls
+Numbered list of mistakes people make and the corrected behaviour.
+
+## See also
+Links to relevant guides, demos, and source files.
+```
+
+## Authoring rules
+
+- Training docs are not release-blocking unless `DEV_CYCLE.md` §8 says so for
+  the specific feature class.
+- One concept per file. If you find yourself splitting into Part 1 / Part 2,
+  the concept is probably two concepts.
+- Reviewers may push back on training that overlaps with an existing guide —
+  guides are the canonical reference; training is supplementary.
diff --git a/events/materializer.py b/events/materializer.py
index cd0bbf24..6ebe90f9 100644
--- a/events/materializer.py
+++ b/events/materializer.py
@@ -91,7 +91,8 @@ async def replay_new_events(self, inner_adapter) -> int:
                         replayed += 1
                     elif etype == "link_commit.completed":
                         await inner_adapter.ingest_commit(
-                            payload.get("commit_hash", ""), payload.get("repo_path", ""),
+                            payload.get("commit_hash", ""),
+                            payload.get("repo_path", ""),
                         )
                         replayed += 1
                     elif etype == "decision_ratified.completed":
diff --git a/events/team_adapter.py b/events/team_adapter.py
index a4ecfae0..3a433e57 100644
--- a/events/team_adapter.py
+++ b/events/team_adapter.py
@@ -8,9 +8,8 @@
 from __future__ import annotations
 
 import logging
-from pathlib import Path
 
-from ledger.queries import find_decision_by_canonical_id, get_canonical_id
+from ledger.queries import get_canonical_id
 
 from .materializer import EventMaterializer
 from .writer import EventFileWriter
@@ -122,13 +121,16 @@ async def bind_decision(
     ) -> dict:
         """Emit bind event, then delegate to inner adapter."""
         await self._ensure_ready()
-        self._writer.write("bind_decision.completed", {
-            "decision_id": decision_id,
-            "file_path": file_path,
-            "symbol_name": symbol_name,
-            "start_line": start_line,
-            "end_line": end_line,
-        })
+        self._writer.write(
+            "bind_decision.completed",
+            {
+                "decision_id": decision_id,
+                "file_path": file_path,
+                "symbol_name": symbol_name,
+                "start_line": start_line,
+                "end_line": end_line,
+            },
+        )
         return await self._inner.bind_decision(
             decision_id=decision_id,
             file_path=file_path,
diff --git a/events/writer.py b/events/writer.py
index fc78965d..6abd159d 100644
--- a/events/writer.py
+++ b/events/writer.py
@@ -17,9 +17,9 @@
 import logging
 import subprocess
 import sys
-from datetime import datetime, timezone
+from datetime import UTC, datetime
 from pathlib import Path
-from typing import Any, IO
+from typing import IO, Any
 
 from pydantic import BaseModel, Field
 
@@ -71,10 +71,11 @@ def _unlock(f: IO[bytes]) -> None:
 
 class EventEnvelope(BaseModel):
     """One event line in ``{email}.jsonl``."""
+
     schema_version: int = 2
     event_type: str
     author: str
-    timestamp: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
+    timestamp: datetime = Field(default_factory=lambda: datetime.now(UTC))
     payload: dict[str, Any] = Field(default_factory=dict)
 
 
@@ -83,7 +84,10 @@ def _get_git_email(repo_path: str | Path) -> str:
     try:
         result = subprocess.run(
             ["git", "config", "user.email"],
-            capture_output=True, text=True, timeout=5, cwd=str(repo_path),
+            capture_output=True,
+            text=True,
+            timeout=5,
+            cwd=str(repo_path),
         )
         email = result.stdout.strip()
         if email:
@@ -117,7 +121,9 @@ def path(self) -> Path:
     def write(self, event_type: str, payload: dict[str, Any]) -> Path:
         """Append one event line. Returns the JSONL file path."""
         envelope = EventEnvelope(
-            event_type=event_type, author=self._author, payload=payload,
+            event_type=event_type,
+            author=self._author,
+            payload=payload,
         )
         line = json.dumps(envelope.model_dump(), separators=(",", ":"), default=str) + "\n"
         with open(self._path, "ab") as f:
diff --git a/handlers/action_hints.py b/handlers/action_hints.py
index 8fc8f2a5..ad0d2bb8 100644
--- a/handlers/action_hints.py
+++ b/handlers/action_hints.py
@@ -41,7 +41,6 @@
     SearchDecisionsResponse,
 )
 
-
 # ── Message variants ───────────────────────────────────────────────
 
 
@@ -127,27 +126,26 @@ def generate_hints_for_search(
 
     drifted = [m for m in response.matches if m.status == "drifted"]
     if drifted:
-        files = sorted({
-            r.file_path
-            for m in drifted
-            for r in m.code_regions
-            if r.file_path
-        })
-        hints.append(ActionHint(
-            kind="review_drift",
-            message=_drift_message(len(drifted), guided_mode),
-            blocking=guided_mode,
-            refs=[m.decision_id for m in drifted] + files,
-        ))
+        files = sorted({r.file_path for m in drifted for r in m.code_regions if r.file_path})
+        hints.append(
+            ActionHint(
+                kind="review_drift",
+                message=_drift_message(len(drifted), guided_mode),
+                blocking=guided_mode,
+                refs=[m.decision_id for m in drifted] + files,
+            )
+        )
 
     ungrounded = [m for m in response.matches if m.status == "ungrounded"]
     if ungrounded:
-        hints.append(ActionHint(
-            kind="ground_decision",
-            message=_ground_message(len(ungrounded), guided_mode),
-            blocking=guided_mode,
-            refs=[m.decision_id for m in ungrounded],
-        ))
+        hints.append(
+            ActionHint(
+                kind="ground_decision",
+                message=_ground_message(len(ungrounded), guided_mode),
+                blocking=guided_mode,
+                refs=[m.decision_id for m in ungrounded],
+            )
+        )
 
     return hints
 
@@ -173,21 +171,25 @@ def generate_hints_for_scan_branch(
         # a symbol but not a file_path directly — fall back to the
         # response-level files_changed list when per-entry file refs
         # aren't available.
-        hints.append(ActionHint(
-            kind="review_drift",
-            message=_drift_message(len(drifted), guided_mode),
-            blocking=guided_mode,
-            refs=[d.decision_id for d in drifted] + response.files_changed,
-        ))
+        hints.append(
+            ActionHint(
+                kind="review_drift",
+                message=_drift_message(len(drifted), guided_mode),
+                blocking=guided_mode,
+                refs=[d.decision_id for d in drifted] + response.files_changed,
+            )
+        )
 
     ungrounded = [d for d in response.decisions if d.status == "ungrounded"]
     if ungrounded:
-        hints.append(ActionHint(
-            kind="ground_decision",
-            message=_ground_message(len(ungrounded), guided_mode),
-            blocking=guided_mode,
-            refs=[d.decision_id for d in ungrounded],
-        ))
+        hints.append(
+            ActionHint(
+                kind="ground_decision",
+                message=_ground_message(len(ungrounded), guided_mode),
+                blocking=guided_mode,
+                refs=[d.decision_id for d in ungrounded],
+            )
+        )
 
     return hints
 
@@ -211,31 +213,34 @@ def generate_hints_from_findings(
     hints: list[ActionHint] = []
 
     if divergences:
-        hints.append(ActionHint(
-            kind="resolve_divergence",
-            message=_divergence_message(len(divergences), guided_mode),
-            blocking=guided_mode,
-            refs=[f"{d.symbol} ({d.file_path})" for d in divergences],
-        ))
+        hints.append(
+            ActionHint(
+                kind="resolve_divergence",
+                message=_divergence_message(len(divergences), guided_mode),
+                blocking=guided_mode,
+                refs=[f"{d.symbol} ({d.file_path})" for d in divergences],
+            )
+        )
 
     if drift_candidates:
-        hints.append(ActionHint(
-            kind="review_drift",
-            message=_drift_message(len(drift_candidates), guided_mode),
-            blocking=guided_mode,
-            refs=[d.decision_id for d in drift_candidates],
-        ))
-
-    open_q_gaps = [
-        g for g in gaps
-        if "open-question" in g.hint or "open question" in g.hint
-    ]
+        hints.append(
+            ActionHint(
+                kind="review_drift",
+                message=_drift_message(len(drift_candidates), guided_mode),
+                blocking=guided_mode,
+                refs=[d.decision_id for d in drift_candidates],
+            )
+        )
+
+    open_q_gaps = [g for g in gaps if "open-question" in g.hint or "open question" in g.hint]
     if open_q_gaps:
-        hints.append(ActionHint(
-            kind="answer_open_questions",
-            message=_open_questions_message(len(open_q_gaps), guided_mode),
-            blocking=guided_mode,
-            refs=[g.description[:140] for g in open_q_gaps],
-        ))
+        hints.append(
+            ActionHint(
+                kind="answer_open_questions",
+                message=_open_questions_message(len(open_q_gaps), guided_mode),
+                blocking=guided_mode,
+                refs=[g.description[:140] for g in open_q_gaps],
+            )
+        )
 
     return hints
diff --git a/handlers/analysis.py b/handlers/analysis.py
index dba8970d..24ce7d22 100644
--- a/handlers/analysis.py
+++ b/handlers/analysis.py
@@ -17,7 +17,6 @@
     DecisionMatch,
 )
 
-
 # ── Divergence detection heuristics ─────────────────────────────────
 
 _NEGATION_PAIRS: list[tuple[str, str]] = [
@@ -39,14 +38,18 @@
 ]
 
 _DIVERGENCE_TOKENS = {
-    " vs ", " vs. ", " or ", "instead of", "rather than",
+    " vs ",
+    " vs. ",
+    " or ",
+    "instead of",
+    "rather than",
 }
 
 
 def _descriptions_conflict(descriptions: list[str]) -> bool:
     lower = [d.lower() for d in descriptions]
     for i, a in enumerate(lower):
-        for b in lower[i + 1:]:
+        for b in lower[i + 1 :]:
             for left, right in _NEGATION_PAIRS:
                 if (left in a and right in b) or (left in b and right in a):
                     return True
@@ -87,8 +90,14 @@ def _detect_divergences(matches: list[DecisionMatch]) -> list[BriefDivergence]:
 # ── Gap extraction heuristic ─────────────────────────────────────────
 
 _OPEN_QUESTION_MARKERS = (
-    "?", " tbd", " tbh", " vs ", " vs. ",
-    "open question", "should we", "which one",
+    "?",
+    " tbd",
+    " tbh",
+    " vs ",
+    " vs. ",
+    "open question",
+    "should we",
+    "which one",
 )
 
 
@@ -102,23 +111,28 @@ def _extract_gaps(matches: list[DecisionMatch]) -> list[BriefGap]:
     gaps: list[BriefGap] = []
     for m in matches:
         if _looks_like_open_question(m.description):
-            gaps.append(BriefGap(
-                description=m.description,
-                hint="open-question phrasing (vs/or/tbd/?)",
-                relevant_source_refs=[m.source_ref] if m.source_ref else [],
-            ))
+            gaps.append(
+                BriefGap(
+                    description=m.description,
+                    hint="open-question phrasing (vs/or/tbd/?)",
+                    relevant_source_refs=[m.source_ref] if m.source_ref else [],
+                )
+            )
             continue
         if m.status == "ungrounded":
-            gaps.append(BriefGap(
-                description=m.description,
-                hint="decision recorded but no code grounding — needs implementation or clarification",
-                relevant_source_refs=[m.source_ref] if m.source_ref else [],
-            ))
+            gaps.append(
+                BriefGap(
+                    description=m.description,
+                    hint="decision recorded but no code grounding — needs implementation or clarification",
+                    relevant_source_refs=[m.source_ref] if m.source_ref else [],
+                )
+            )
     return gaps
 
 
 # ── Shape conversion ─────────────────────────────────────────────────
 
+
 def _to_brief_decision(m: DecisionMatch) -> BriefDecision:
     return BriefDecision(
         decision_id=m.decision_id,
diff --git a/handlers/bind.py b/handlers/bind.py
index c5f91ac1..236d4aae 100644
--- a/handlers/bind.py
+++ b/handlers/bind.py
@@ -1,7 +1,9 @@
 """Handler for bicameral.bind — caller-LLM-driven code region binding."""
 
 from __future__ import annotations
+
 import logging
+
 from contracts import BindResponse, BindResult, PendingComplianceCheck, SyncMetrics
 from handlers.sync_middleware import repo_write_barrier
 
@@ -48,6 +50,7 @@ async def _do_bind(ctx, bindings: list[dict]) -> BindResponse:
     effective_ref = authoritative_sha
     if head_sha and head_sha not in ("HEAD", ""):
         from handlers.link_commit import _is_ephemeral_commit
+
         if _is_ephemeral_commit(head_sha, repo, authoritative_ref):
             effective_ref = head_sha
 
@@ -62,46 +65,68 @@ async def _do_bind(ctx, bindings: list[dict]) -> BindResponse:
         purpose = str(b.get("purpose") or "")
 
         if not decision_id or not file_path or not symbol_name:
-            results.append(BindResult(
-                decision_id=decision_id, region_id="", content_hash="",
-                error="decision_id, file_path, and symbol_name are required",
-            ))
+            results.append(
+                BindResult(
+                    decision_id=decision_id,
+                    region_id="",
+                    content_hash="",
+                    error="decision_id, file_path, and symbol_name are required",
+                )
+            )
             continue
 
         try:
             exists = await ledger.decision_exists(decision_id)
         except Exception as exc:
-            results.append(BindResult(
-                decision_id=decision_id, region_id="", content_hash="",
-                error=f"decision lookup failed: {exc}",
-            ))
+            results.append(
+                BindResult(
+                    decision_id=decision_id,
+                    region_id="",
+                    content_hash="",
+                    error=f"decision lookup failed: {exc}",
+                )
+            )
             continue
 
         if not exists:
-            results.append(BindResult(
-                decision_id=decision_id, region_id="", content_hash="",
-                error=f"unknown_decision_id: {decision_id}",
-            ))
+            results.append(
+                BindResult(
+                    decision_id=decision_id,
+                    region_id="",
+                    content_hash="",
+                    error=f"unknown_decision_id: {decision_id}",
+                )
+            )
             continue
 
         if start_line is None or end_line is None:
             from ledger.status import resolve_symbol_lines
+
             resolved = resolve_symbol_lines(file_path, symbol_name, repo, ref=effective_ref)
             if resolved is None:
-                results.append(BindResult(
-                    decision_id=decision_id, region_id="", content_hash="",
-                    error=f"symbol '{symbol_name}' not found in {file_path} at {effective_ref}",
-                ))
+                results.append(
+                    BindResult(
+                        decision_id=decision_id,
+                        region_id="",
+                        content_hash="",
+                        error=f"symbol '{symbol_name}' not found in {file_path} at {effective_ref}",
+                    )
+                )
                 continue
             start_line, end_line = resolved
         else:
             start_line, end_line = int(start_line), int(end_line)
             from ledger.status import get_git_content
+
             if get_git_content(file_path, 1, 1, repo, ref=effective_ref) is None:
-                results.append(BindResult(
-                    decision_id=decision_id, region_id="", content_hash="",
-                    error=f"file '{file_path}' does not exist at {effective_ref} — only bind to existing code, never hypothetical files",
-                ))
+                results.append(
+                    BindResult(
+                        decision_id=decision_id,
+                        region_id="",
+                        content_hash="",
+                        error=f"file '{file_path}' does not exist at {effective_ref} — only bind to existing code, never hypothetical files",
+                    )
+                )
                 continue
 
         try:
@@ -117,10 +142,14 @@ async def _do_bind(ctx, bindings: list[dict]) -> BindResponse:
             )
         except Exception as exc:
             logger.warning("[bind] bind_decision failed: %s", exc)
-            results.append(BindResult(
-                decision_id=decision_id, region_id="", content_hash="",
-                error=str(exc),
-            ))
+            results.append(
+                BindResult(
+                    decision_id=decision_id,
+                    region_id="",
+                    content_hash="",
+                    error=str(exc),
+                )
+            )
             continue
 
         region_id = bind_result["region_id"]
@@ -151,11 +180,13 @@ async def _do_bind(ctx, bindings: list[dict]) -> BindResponse:
             except Exception as exc:
                 logger.warning(
                     "[bind] decision_level lookup failed for %s: %s — skipping codegenome write",
-                    decision_id, exc,
+                    decision_id,
+                    exc,
                 )
                 level = None  # treat lookup failure as "skip" — safer than over-writing
             if level == "L2":
                 from codegenome.bind_service import write_codegenome_identity
+
                 try:
                     await write_codegenome_identity(
                         ledger=ledger,
@@ -172,12 +203,14 @@ async def _do_bind(ctx, bindings: list[dict]) -> BindResponse:
                 except Exception as exc:
                     logger.warning(
                         "[bind] codegenome identity write failed for %s: %s",
-                        decision_id, exc,
+                        decision_id,
+                        exc,
                     )
             else:
                 logger.debug(
                     "[bind] L1 exemption — skipping codegenome write for %s (decision_level=%r)",
-                    decision_id, level,
+                    decision_id,
+                    level,
                 )
 
         pending_check = None
@@ -196,15 +229,18 @@ async def _do_bind(ctx, bindings: list[dict]) -> BindResponse:
                 content_hash=content_hash,
             )
 
-        results.append(BindResult(
-            decision_id=decision_id,
-            region_id=region_id,
-            content_hash=content_hash,
-            pending_compliance_check=pending_check,
-        ))
+        results.append(
+            BindResult(
+                decision_id=decision_id,
+                region_id=region_id,
+                content_hash=content_hash,
+                pending_compliance_check=pending_check,
+            )
+        )
 
     try:
         from dashboard.server import notify_dashboard
+
         await notify_dashboard(ctx)
     except Exception:
         pass
diff --git a/handlers/decision_status.py b/handlers/decision_status.py
index 68a06179..23f701a0 100644
--- a/handlers/decision_status.py
+++ b/handlers/decision_status.py
@@ -7,7 +7,7 @@
 from __future__ import annotations
 
 import logging
-from datetime import datetime, timezone
+from datetime import UTC, datetime
 
 from contracts import CodeRegionSummary, DecisionStatusEntry, DecisionStatusResponse
 
@@ -23,6 +23,7 @@ async def handle_decision_status(
     # Auto-sync to HEAD so status reflects current code state
     try:
         from handlers.link_commit import handle_link_commit
+
         await handle_link_commit(ctx, ref)
     except Exception as exc:
         logger.warning("[status] auto-sync failed: %s", exc)
@@ -50,26 +51,28 @@ async def handle_decision_status(
         ]
 
         _signoff = d.get("signoff") or {}
-        entries.append(DecisionStatusEntry(
-            decision_id=d["decision_id"],
-            description=d["description"],
-            status=status,
-            signoff_state=(_signoff.get("state") if isinstance(_signoff, dict) else None),
-            source_type=d.get("source_type", ""),
-            source_ref=d.get("source_ref", ""),
-            ingested_at=d.get("ingested_at", ""),
-            code_regions=regions,
-            drift_evidence=d.get("drift_evidence", ""),
-            blast_radius=d.get("blast_radius", []),
-            source_excerpt=d.get("source_excerpt", ""),
-            meeting_date=d.get("meeting_date", ""),
-            speakers=d.get("speakers", []),
-            signoff=d.get("signoff"),
-        ))
+        entries.append(
+            DecisionStatusEntry(
+                decision_id=d["decision_id"],
+                description=d["description"],
+                status=status,
+                signoff_state=(_signoff.get("state") if isinstance(_signoff, dict) else None),
+                source_type=d.get("source_type", ""),
+                source_ref=d.get("source_ref", ""),
+                ingested_at=d.get("ingested_at", ""),
+                code_regions=regions,
+                drift_evidence=d.get("drift_evidence", ""),
+                blast_radius=d.get("blast_radius", []),
+                source_excerpt=d.get("source_excerpt", ""),
+                meeting_date=d.get("meeting_date", ""),
+                speakers=d.get("speakers", []),
+                signoff=d.get("signoff"),
+            )
+        )
 
     return DecisionStatusResponse(
         ref=ref,
-        as_of=datetime.now(timezone.utc).isoformat(),
+        as_of=datetime.now(UTC).isoformat(),
         summary=summary,
         decisions=entries,
     )
diff --git a/handlers/detect_drift.py b/handlers/detect_drift.py
index 05341811..5045aa1f 100644
--- a/handlers/detect_drift.py
+++ b/handlers/detect_drift.py
@@ -42,7 +42,7 @@ def _resolve_subjects_eligible(decision: dict) -> bool:
     """
     level = decision.get("decision_level")
     if level is None:
-        return True   # pre-v0.9.3 decisions: eligible by default for backward compat
+        return True  # pre-v0.9.3 decisions: eligible by default for backward compat
     return level == "L2"
 
 
@@ -73,18 +73,20 @@ def raw_decisions_to_drift_entries(
             counts["ungrounded"] += 1
 
         _signoff = d.get("signoff") or {}
-        entries.append(DriftEntry(
-            decision_id=d["decision_id"],
-            description=d["description"],
-            status=status,
-            signoff_state=(_signoff.get("state") if isinstance(_signoff, dict) else None),
-            symbol=region.get("symbol", ""),
-            lines=tuple(region.get("lines", (0, 0))),
-            drift_evidence=drift_evidence,
-            source_ref=d.get("source_ref", ""),
-            source_excerpt=d.get("source_excerpt", ""),
-            meeting_date=d.get("meeting_date", ""),
-        ))
+        entries.append(
+            DriftEntry(
+                decision_id=d["decision_id"],
+                description=d["description"],
+                status=status,
+                signoff_state=(_signoff.get("state") if isinstance(_signoff, dict) else None),
+                symbol=region.get("symbol", ""),
+                lines=tuple(region.get("lines", (0, 0))),
+                drift_evidence=drift_evidence,
+                source_ref=d.get("source_ref", ""),
+                source_excerpt=d.get("source_excerpt", ""),
+                meeting_date=d.get("meeting_date", ""),
+            )
+        )
 
     return entries, counts
 
@@ -101,12 +103,8 @@ async def handle_detect_drift(
     if os.getenv("USE_REAL_CODE_LOCATOR", "0") == "1":
         abs_path = str((Path(ctx.repo_path) / file_path).resolve())
         all_symbols = await ctx.code_graph.extract_symbols(abs_path)
-        decision_symbols = {
-            d.get("code_region", {}).get("symbol", "") for d in raw_decisions
-        }
-        undocumented = [
-            s["name"] for s in all_symbols if s["name"] not in decision_symbols
-        ]
+        decision_symbols = {d.get("code_region", {}).get("symbol", "") for d in raw_decisions}
+        undocumented = [s["name"] for s in all_symbols if s["name"] not in decision_symbols]
     else:
         undocumented = await ctx.ledger.get_undocumented_symbols(file_path)
 
@@ -188,7 +186,12 @@ def _enrich_with_cosmetic_hints(
             head_range = resolve_symbol_lines(file_path, entry.symbol, repo_path, ref="HEAD")
             wt_range = resolve_symbol_lines(file_path, entry.symbol, repo_path, ref="working_tree")
         except Exception as exc:
-            logger.debug("[detect_drift] resolve_symbol_lines failed for %s/%s: %s", file_path, entry.symbol, exc)
+            logger.debug(
+                "[detect_drift] resolve_symbol_lines failed for %s/%s: %s",
+                file_path,
+                entry.symbol,
+                exc,
+            )
             continue
         if head_range is None or wt_range is None:
             continue  # symbol absent at one side — not a cosmetic case
@@ -200,8 +203,8 @@ def _enrich_with_cosmetic_hints(
         if wt_start <= 0 or wt_end < wt_start:
             continue
 
-        head_slice = "\n".join(head_lines[head_start - 1:head_end])
-        wt_slice = "\n".join(wt_lines[wt_start - 1:wt_end])
+        head_slice = "\n".join(head_lines[head_start - 1 : head_end])
+        wt_slice = "\n".join(wt_lines[wt_start - 1 : wt_end])
         if not head_slice or not wt_slice:
             continue
         if head_slice == wt_slice:
diff --git a/handlers/gap_judge.py b/handlers/gap_judge.py
index ba32a52c..15026ca5 100644
--- a/handlers/gap_judge.py
+++ b/handlers/gap_judge.py
@@ -28,7 +28,7 @@
 from __future__ import annotations
 
 import logging
-from datetime import datetime, timezone
+from datetime import UTC, datetime
 
 from contracts import (
     DecisionMatch,
@@ -291,7 +291,7 @@ async def handle_judge_gaps(
 
     return GapJudgmentPayload(
         topic=topic,
-        as_of=datetime.now(timezone.utc).isoformat(),
+        as_of=datetime.now(UTC).isoformat(),
         decisions=context_decisions,
         phrasing_gaps=phrasing_gaps,
         rubric=_build_rubric(),
diff --git a/handlers/history.py b/handlers/history.py
index 67c96071..852af509 100644
--- a/handlers/history.py
+++ b/handlers/history.py
@@ -50,7 +50,6 @@ def _slugify(name: str) -> str:
     return slug.strip("-") or "uncategorized"
 
 
-
 def _decision_status_for_history(
     decision_status: str,
     has_code_regions: bool,
@@ -86,14 +85,16 @@ def _row_to_history_decision(
         if not r:
             continue
         symbol = r.get("symbol") or r.get("symbol_name") or None
-        fulfillments.append(HistoryFulfillment(
-            file_path=str(r.get("file_path") or ""),
-            symbol=symbol,
-            start_line=int(r.get("start_line") or 0),
-            end_line=int(r.get("end_line") or 0),
-            baseline_hash=r.get("content_hash") or None,
-            current_hash=r.get("content_hash") or None,
-        ))
+        fulfillments.append(
+            HistoryFulfillment(
+                file_path=str(r.get("file_path") or ""),
+                symbol=symbol,
+                start_line=int(r.get("start_line") or 0),
+                end_line=int(r.get("end_line") or 0),
+                baseline_hash=r.get("content_hash") or None,
+                current_hash=r.get("content_hash") or None,
+            )
+        )
 
     # Source spans → HistorySource list
     # get_all_decisions returns source_excerpt + meeting_date extracted from first span.
@@ -111,13 +112,15 @@ def _row_to_history_decision(
             raw_type = str(span.get("source_type") or row.get("source_type") or "manual")
             speakers = span.get("speakers") or []
             speaker = speakers[0] if speakers else None
-            sources.append(HistorySource(
-                source_ref=str(span.get("source_ref") or row.get("source_ref") or ""),
-                source_type=_normalize_source_type(raw_type),  # type: ignore[arg-type]
-                date=str(span.get("meeting_date") or row.get("meeting_date") or ""),
-                speaker=speaker,
-                quote=text,
-            ))
+            sources.append(
+                HistorySource(
+                    source_ref=str(span.get("source_ref") or row.get("source_ref") or ""),
+                    source_type=_normalize_source_type(raw_type),  # type: ignore[arg-type]
+                    date=str(span.get("meeting_date") or row.get("meeting_date") or ""),
+                    speaker=speaker,
+                    quote=text,
+                )
+            )
     else:
         # Fallback: build a single source from denormalized columns
         source_excerpt = str(row.get("source_excerpt") or "")
@@ -125,13 +128,15 @@ def _row_to_history_decision(
         source_type = str(row.get("source_type") or "manual")
         meeting_date = str(row.get("meeting_date") or "")
         if source_excerpt or source_ref:
-            sources.append(HistorySource(
-                source_ref=source_ref,
-                source_type=_normalize_source_type(source_type),  # type: ignore[arg-type]
-                date=meeting_date,
-                speaker=None,
-                quote=source_excerpt or description,
-            ))
+            sources.append(
+                HistorySource(
+                    source_ref=source_ref,
+                    source_type=_normalize_source_type(source_type),  # type: ignore[arg-type]
+                    date=meeting_date,
+                    speaker=None,
+                    quote=source_excerpt or description,
+                )
+            )
 
     drift_evidence: str | None = row.get("drift_evidence") or None
     signoff: dict | None = row.get("signoff") or None
@@ -212,7 +217,7 @@ async def _fetch_all_decisions_enriched(ledger) -> list[dict]:
     for row in rows:
         ca = row.pop("created_at", None)
         row.setdefault("ingested_at", str(ca)[:24] if ca else "")
-        for region in (row.get("code_regions") or []):
+        for region in row.get("code_regions") or []:
             if region and "symbol_name" in region:
                 region["symbol"] = region.pop("symbol_name")
 
@@ -289,13 +294,13 @@ async def handle_history(
     """
     # V1 A3: time the catch-up locally so history can report it.
     import time as _time
-    from handlers.sync_middleware import ensure_ledger_synced
+
     from contracts import SyncMetrics
+    from handlers.sync_middleware import ensure_ledger_synced
+
     _t0 = _time.perf_counter()
     await ensure_ledger_synced(ctx)
-    sync_metrics = SyncMetrics(
-        sync_catchup_ms=round((_time.perf_counter() - _t0) * 1000, 3)
-    )
+    sync_metrics = SyncMetrics(sync_catchup_ms=round((_time.perf_counter() - _t0) * 1000, 3))
 
     ledger = ctx.ledger
     if hasattr(ledger, "connect"):
@@ -326,11 +331,13 @@ async def handle_history(
         if not decisions:
             continue
 
-        features.append(HistoryFeature(
-            id=feature_id,
-            name=feature_name,
-            decisions=decisions,
-        ))
+        features.append(
+            HistoryFeature(
+                id=feature_id,
+                name=feature_name,
+                decisions=decisions,
+            )
+        )
 
     # Apply feature_filter
     if feature_filter:
@@ -351,8 +358,7 @@ async def handle_history(
     # Mark decisions whose current compliance verdict came from a feature-branch commit.
     # Only meaningful for decisions that have a status verdict (reflected/drifted).
     verifiable_ids = [
-        d.id for f in features for d in f.decisions
-        if d.status in ("reflected", "drifted")
+        d.id for f in features for d in f.decisions if d.status in ("reflected", "drifted")
     ]
     ephemeral_ids = await _fetch_ephemeral_decision_ids(ledger, verifiable_ids)
     if ephemeral_ids:
diff --git a/handlers/ingest.py b/handlers/ingest.py
index a6bfb781..449038b9 100644
--- a/handlers/ingest.py
+++ b/handlers/ingest.py
@@ -7,6 +7,7 @@
 from __future__ import annotations
 
 import logging
+from datetime import UTC
 
 from contracts import (
     ContextForCandidate,
@@ -72,13 +73,15 @@ def _normalize_payload(payload: dict) -> dict:
         # committed to them, no code implements them. signoff.discovered=true
         # marks them as AI-discovered so consumers can distinguish them from
         # explicitly ingested decisions without a description prefix hack.
-        mappings.append({
-            "intent": q,
-            "span": {**source_meta, "text": ""},
-            "symbols": [],
-            "code_regions": [],
-            "signoff": {"state": "proposed", "discovered": True},
-        })
+        mappings.append(
+            {
+                "intent": q,
+                "span": {**source_meta, "text": ""},
+                "symbols": [],
+                "code_regions": [],
+                "signoff": {"state": "proposed", "discovered": True},
+            }
+        )
 
     if not mappings:
         logger.warning(
@@ -191,12 +194,14 @@ async def _find_context_for_candidates(
                 if pair in seen_pairs:
                     continue
                 seen_pairs.add(pair)
-                candidates.append(ContextForCandidate(
-                    span_id=span_id,
-                    decision_id=decision_id,
-                    decision_description=m.get("description", ""),
-                    overlap_score=float(m.get("overlap_score", 0.0)),
-                ))
+                candidates.append(
+                    ContextForCandidate(
+                        span_id=span_id,
+                        decision_id=decision_id,
+                        decision_description=m.get("description", ""),
+                        overlap_score=float(m.get("overlap_score", 0.0)),
+                    )
+                )
                 if len(candidates) >= top_k:
                     return candidates
         except Exception as exc:
@@ -229,14 +234,16 @@ async def handle_ingest(
         if span.get("source_type") in _SESSION_SOURCE_TYPES and not span.get("speakers"):
             if _git_email_cache is None:
                 from events.writer import _get_git_email
+
                 _git_email_cache = _get_git_email(ctx.repo_path)
             if _git_email_cache and _git_email_cache != "unknown":
                 span["speakers"] = [_git_email_cache]
 
     payload = ctx.code_graph.resolve_symbols(payload)
 
-    from datetime import datetime, timezone
-    _now_iso = datetime.now(timezone.utc).isoformat()
+    from datetime import datetime
+
+    _now_iso = datetime.now(UTC).isoformat()
     _session_id = getattr(ctx, "session_id", None) or ""
 
     # v0.7.0: every new ingest enters as 'proposed' by default.
@@ -262,7 +269,10 @@ async def handle_ingest(
             "(HEAD=%s); baseline hashes will be stamped against %s so the "
             "ledger stays branch-independent. Switch to %s if you want "
             "baselines pinned to the current working tree.",
-            authoritative_ref, head_sha[:8], authoritative_ref, authoritative_ref,
+            authoritative_ref,
+            head_sha[:8],
+            authoritative_ref,
+            authoritative_ref,
         )
 
     # v0.4.8: writes always invalidate the within-call sync cache. In the
@@ -272,6 +282,7 @@ async def handle_ingest(
     # then writes would leave a stale cache covering post-write reads.
     try:
         from handlers.link_commit import handle_link_commit, invalidate_sync_cache
+
         invalidate_sync_cache(ctx)
     except Exception:
         pass
@@ -305,6 +316,7 @@ async def handle_ingest(
         topics = _derive_topics(payload)
         if topics:
             from handlers.gap_judge import handle_judge_gaps
+
             for topic in topics:
                 jp = await handle_judge_gaps(ctx, topic=topic)
                 if jp is not None:
@@ -314,7 +326,9 @@ async def handle_ingest(
     judgment_payload = judgment_payloads[0] if judgment_payloads else None
 
     cursor_summary = None
-    source_type = str(((payload.get("mappings") or [{}])[0].get("span") or {}).get("source_type", "manual"))
+    source_type = str(
+        ((payload.get("mappings") or [{}])[0].get("span") or {}).get("source_type", "manual")
+    )
     last_source_ref = _derive_last_source_ref(payload)
     if hasattr(ledger, "upsert_source_cursor"):
         cursor_row = await ledger.upsert_source_cursor(
@@ -370,8 +384,7 @@ async def handle_ingest(
             for d in result.get("created_decisions", [])
         ],
         pending_grounding_decisions=[
-            d for d in result.get("ungrounded_decisions", [])
-            if d.get("decision_level") != "L1"
+            d for d in result.get("ungrounded_decisions", []) if d.get("decision_level") != "L1"
         ],
         context_for_candidates=context_for_candidates,
         source_cursor=cursor_summary,
@@ -382,6 +395,7 @@ async def handle_ingest(
 
     try:
         from dashboard.server import notify_dashboard
+
         await notify_dashboard(ctx)
     except Exception:
         pass
diff --git a/handlers/link_commit.py b/handlers/link_commit.py
index 9c37b06f..56c8509b 100644
--- a/handlers/link_commit.py
+++ b/handlers/link_commit.py
@@ -109,6 +109,7 @@ def _build_verification_instruction(
         parts.append(_GROUNDING_INSTRUCTION_RELOCATION)
     return "".join(parts)
 
+
 logger = logging.getLogger(__name__)
 
 
@@ -125,6 +126,7 @@ def _read_current_head_sha(repo_path: str) -> str:
     """
     try:
         import subprocess
+
         result = subprocess.run(
             ["git", "rev-parse", "HEAD"],
             cwd=repo_path,
@@ -230,6 +232,7 @@ def invalidate_sync_cache(ctx) -> None:
         sync_state.pop("last_sync_response", None)
         sync_state.pop("pending_flow_id", None)
     from handlers.sync_middleware import invalidate_process_cache
+
     invalidate_process_cache()
 
 
@@ -252,7 +255,8 @@ async def handle_link_commit(ctx, commit_hash: str = "HEAD") -> LinkCommitRespon
     try:
         if hasattr(ctx.ledger, "backfill_empty_hashes"):
             await ctx.ledger.backfill_empty_hashes(
-                ctx.repo_path, drift_analyzer=ctx.drift_analyzer,
+                ctx.repo_path,
+                drift_analyzer=ctx.drift_analyzer,
             )
     except Exception as exc:
         logger.warning("[link_commit] backfill failed: %s", exc)
@@ -281,9 +285,7 @@ async def handle_link_commit(ctx, commit_hash: str = "HEAD") -> LinkCommitRespon
 
     has_action_items = bool(pending) or bool(pending_grounding_raw)
     verification_text = (
-        _build_verification_instruction(pending, pending_grounding_raw)
-        if has_action_items
-        else ""
+        _build_verification_instruction(pending, pending_grounding_raw) if has_action_items else ""
     )
 
     is_ephemeral = _is_ephemeral_commit(
@@ -318,6 +320,7 @@ async def handle_link_commit(ctx, commit_hash: str = "HEAD") -> LinkCommitRespon
 
     try:
         from dashboard.server import notify_dashboard
+
         await notify_dashboard(ctx)
     except Exception:
         pass
diff --git a/handlers/preflight.py b/handlers/preflight.py
index ec2ac5ec..147b65b7 100644
--- a/handlers/preflight.py
+++ b/handlers/preflight.py
@@ -31,20 +31,16 @@
 import logging
 import os
 import time
-from datetime import datetime, timezone
 from pathlib import Path
 
 from contracts import (
-    ActionHint,
     BriefDecision,
-    BriefDivergence,
-    BriefGap,
     CodeRegionSummary,
     DecisionMatch,
     PreflightResponse,
 )
-from handlers.analysis import _to_brief_decision
 from handlers.action_hints import generate_hints_from_findings
+from handlers.analysis import _to_brief_decision
 
 logger = logging.getLogger(__name__)
 
@@ -76,22 +72,86 @@ def _should_show_product_stage() -> bool:
     except Exception:
         return False
 
-_GENERIC_TOPICS = frozenset({
-    "code", "project", "everything", "anything", "stuff",
-    "thing", "things", "feature", "features", "system",
-    "module", "function", "method",
-})
 
-_STOPWORDS = frozenset({
-    "the", "and", "for", "that", "this", "with", "are", "from", "have",
-    "will", "when", "then", "been", "also", "into", "about", "should",
-    "must", "need", "each", "they", "their", "there", "which", "where",
-    "what", "than", "some", "more", "such", "only", "very", "just",
-    "like", "make", "made", "use", "used", "using", "after", "before",
-    "over", "under", "between", "through", "against", "implement",
-    "build", "create", "modify", "refactor", "update", "change", "fix",
-    "edit", "remove", "delete",
-})
+_GENERIC_TOPICS = frozenset(
+    {
+        "code",
+        "project",
+        "everything",
+        "anything",
+        "stuff",
+        "thing",
+        "things",
+        "feature",
+        "features",
+        "system",
+        "module",
+        "function",
+        "method",
+    }
+)
+
+_STOPWORDS = frozenset(
+    {
+        "the",
+        "and",
+        "for",
+        "that",
+        "this",
+        "with",
+        "are",
+        "from",
+        "have",
+        "will",
+        "when",
+        "then",
+        "been",
+        "also",
+        "into",
+        "about",
+        "should",
+        "must",
+        "need",
+        "each",
+        "they",
+        "their",
+        "there",
+        "which",
+        "where",
+        "what",
+        "than",
+        "some",
+        "more",
+        "such",
+        "only",
+        "very",
+        "just",
+        "like",
+        "make",
+        "made",
+        "use",
+        "used",
+        "using",
+        "after",
+        "before",
+        "over",
+        "under",
+        "between",
+        "through",
+        "against",
+        "implement",
+        "build",
+        "create",
+        "modify",
+        "refactor",
+        "update",
+        "change",
+        "fix",
+        "edit",
+        "remove",
+        "delete",
+    }
+)
 
 
 def _content_tokens(text: str) -> set[str]:
@@ -99,6 +159,7 @@ def _content_tokens(text: str) -> set[str]:
     shape but with implementation verbs added to the stopword set so
     'implement Stripe webhook' yields ['stripe', 'webhook']."""
     import re
+
     raw = re.findall(r"[A-Za-z]{4,}", text or "")
     return {t.lower() for t in raw if t.lower() not in _STOPWORDS}
 
@@ -153,16 +214,26 @@ def _check_dedup(ctx, topic: str) -> bool:
 async def _region_anchored_preflight(
     ctx,
     file_paths: list[str],
-) -> list[DecisionMatch]:
+) -> tuple[list[DecisionMatch], bool]:
     """file_paths (caller-supplied) → decisions pinned to those regions.
 
     The caller LLM is responsible for resolving which files a proposed change
     will touch — preflight then looks up decisions pinned to those files in
-    the ledger. Returns DecisionMatch objects with confidence=0.9 (direct
-    pin, not keyword match).
+    the ledger. Before the lookup, run a 1-hop code-graph expansion via the
+    code-locator adapter (#173): caller-LLM discovery is imprecise, and a
+    decision bound to ``app/src/lib/git/reorder.ts`` should still surface
+    when the caller passes the structurally-near ``app/src/ui/multi-commit-
+    operation/reorder.tsx``. Expansion is deterministic, no LLM in the path,
+    bounded by ``code_locator/config.py::max_neighbors_per_result``.
+
+    Returns ``(matches, expanded)`` where ``expanded`` is True iff the graph
+    expansion produced extra paths beyond the caller-supplied set, so the
+    caller can record ``"graph"`` in ``sources_chained``. Direct-pin matches
+    carry ``confidence=0.9``; matches surfaced only via expanded paths carry
+    ``confidence=0.7``.
     """
     if not file_paths:
-        return []
+        return [], False
 
     # Dedup + normalize while preserving caller-supplied order.
     seen_paths: set[str] = set()
@@ -173,16 +244,34 @@ async def _region_anchored_preflight(
             seen_paths.add(fp)
             ordered.append(fp)
     if not ordered:
-        return []
+        return [], False
+
+    # Graph expansion. Defensive: code_graph may be absent (mock contexts) or
+    # the adapter may not implement the method (older deployments). Either
+    # case falls back to direct file_paths only.
+    direct_paths: set[str] = set(ordered)
+    expanded_paths = list(ordered)
+    expanded_only_paths: set[str] = set()
+    code_graph = getattr(ctx, "code_graph", None)
+    expander = getattr(code_graph, "expand_file_paths_via_graph", None) if code_graph else None
+    if expander is not None:
+        try:
+            expanded_paths, added_paths = expander(ordered, hops=1)
+            expanded_only_paths = set(added_paths)
+        except Exception as exc:
+            logger.debug("[preflight:region] graph expansion failed: %s", exc)
+            expanded_paths = list(ordered)
+            expanded_only_paths = set()
 
     try:
-        raw = await ctx.ledger.get_decisions_for_files(ordered)
+        raw = await ctx.ledger.get_decisions_for_files(expanded_paths)
     except Exception as exc:
         logger.debug("[preflight:region] ledger region lookup failed: %s", exc)
-        return []
+        return [], False
 
     matches: list[DecisionMatch] = []
     seen_ids: set[str] = set()
+    surfaced_via_expansion = False
     for d in raw:
         did = d.get("decision_id", "")
         if did in seen_ids:
@@ -191,35 +280,55 @@ async def _region_anchored_preflight(
         region_dict = d.get("code_region")
         regions = []
         if region_dict:
-            regions = [CodeRegionSummary(
-                file_path=region_dict.get("file_path", ""),
-                symbol=region_dict.get("symbol", ""),
-                lines=tuple(region_dict.get("lines", (0, 0))),
-                purpose=region_dict.get("purpose", ""),
-            )]
+            regions = [
+                CodeRegionSummary(
+                    file_path=region_dict.get("file_path", ""),
+                    symbol=region_dict.get("symbol", ""),
+                    lines=tuple(region_dict.get("lines", (0, 0))),
+                    purpose=region_dict.get("purpose", ""),
+                )
+            ]
 
         status = str(d.get("status") or "ungrounded")
         if status not in ("reflected", "drifted", "pending", "ungrounded"):
             status = "ungrounded" if not regions else "pending"
 
-        _sf = d.get("signoff") or {}
-        matches.append(DecisionMatch(
-            decision_id=d.get("decision_id", ""),
-            description=d.get("description", ""),
-            status=status,
-            signoff_state=(_sf.get("state") if isinstance(_sf, dict) else None),
-            confidence=0.9,
-            source_ref=d.get("source_ref", ""),
-            code_regions=regions,
-            drift_evidence="",
-            related_constraints=[],
-            source_excerpt=d.get("source_excerpt", ""),
-            meeting_date=d.get("meeting_date", ""),
-            signoff=d.get("signoff"),
-        ))
+        # Provenance: a decision is "directly pinned" if any of its bound
+        # code_regions live in a caller-supplied path; otherwise it was only
+        # reached via 1-hop graph expansion. Caller can de-prioritize the
+        # latter (lower confidence) without losing recall.
+        bound_paths = {
+            (r.get("file_path") or "").strip()
+            for r in (d.get("code_regions") or [])
+            if r and (r.get("file_path") or "").strip()
+        }
+        # Single-region decisions also have a top-level ``code_region`` (used
+        # above); include it in the provenance check.
+        if region_dict and (region_dict.get("file_path") or "").strip():
+            bound_paths.add(region_dict["file_path"].strip())
+        is_direct = bool(bound_paths & direct_paths) if bound_paths else not expanded_only_paths
+        if not is_direct:
+            surfaced_via_expansion = True
 
-    return matches
+        _sf = d.get("signoff") or {}
+        matches.append(
+            DecisionMatch(
+                decision_id=d.get("decision_id", ""),
+                description=d.get("description", ""),
+                status=status,
+                signoff_state=(_sf.get("state") if isinstance(_sf, dict) else None),
+                confidence=0.9 if is_direct else 0.7,
+                source_ref=d.get("source_ref", ""),
+                code_regions=regions,
+                drift_evidence="",
+                related_constraints=[],
+                source_excerpt=d.get("source_excerpt", ""),
+                meeting_date=d.get("meeting_date", ""),
+                signoff=d.get("signoff"),
+            )
+        )
 
+    return matches, surfaced_via_expansion
 
 
 async def handle_preflight(
@@ -233,7 +342,10 @@ async def handle_preflight(
 
     # Explicit mute via env var — one-line off-switch for the session.
     if os.getenv("BICAMERAL_PREFLIGHT_MUTE", "").strip().lower() in (
-        "1", "true", "yes", "on",
+        "1",
+        "true",
+        "yes",
+        "on",
     ):
         return PreflightResponse(
             topic=topic,
@@ -254,13 +366,13 @@ async def handle_preflight(
 
     # V1 A3: time the call locally so the metric reflects THIS handler's catch-up.
     import time as _time
-    from handlers.sync_middleware import ensure_ledger_synced
+
     from contracts import SyncMetrics
+    from handlers.sync_middleware import ensure_ledger_synced
+
     _t0 = _time.perf_counter()
     await ensure_ledger_synced(ctx)
-    sync_metrics = SyncMetrics(
-        sync_catchup_ms=round((_time.perf_counter() - _t0) * 1000, 3)
-    )
+    sync_metrics = SyncMetrics(sync_catchup_ms=round((_time.perf_counter() - _t0) * 1000, 3))
 
     sources_chained: list[str] = []
 
@@ -271,9 +383,11 @@ async def handle_preflight(
     region_matches: list[DecisionMatch] = []
     if file_paths:
         try:
-            region_matches = await _region_anchored_preflight(ctx, file_paths)
+            region_matches, used_graph_expansion = await _region_anchored_preflight(ctx, file_paths)
             if region_matches:
                 sources_chained.append("region")
+                if used_graph_expansion:
+                    sources_chained.append("graph")
         except Exception as exc:
             logger.debug("[preflight] region lookup failed: %s", exc)
 
@@ -285,28 +399,33 @@ async def handle_preflight(
     context_pending_ready: list[BriefDecision] = []
     try:
         from ledger.queries import get_collision_pending_decisions, get_context_for_ready_decisions
+
         inner = getattr(ctx.ledger, "_inner", ctx.ledger)
         client = inner._client
         coll_rows = await get_collision_pending_decisions(client)
         for r in coll_rows:
             _sf = r.get("signoff") or {}
-            unresolved_collisions.append(BriefDecision(
-                decision_id=r["decision_id"],
-                description=r["description"],
-                status=r.get("status") or "ungrounded",
-                signoff_state=(_sf.get("state") if isinstance(_sf, dict) else None),
-                signoff=r.get("signoff"),
-            ))
+            unresolved_collisions.append(
+                BriefDecision(
+                    decision_id=r["decision_id"],
+                    description=r["description"],
+                    status=r.get("status") or "ungrounded",
+                    signoff_state=(_sf.get("state") if isinstance(_sf, dict) else None),
+                    signoff=r.get("signoff"),
+                )
+            )
         ctx_rows = await get_context_for_ready_decisions(client)
         for r in ctx_rows:
             _sf = r.get("signoff") or {}
-            context_pending_ready.append(BriefDecision(
-                decision_id=r["decision_id"],
-                description=r["description"],
-                status=r.get("status") or "ungrounded",
-                signoff_state=(_sf.get("state") if isinstance(_sf, dict) else None),
-                signoff=r.get("signoff"),
-            ))
+            context_pending_ready.append(
+                BriefDecision(
+                    decision_id=r["decision_id"],
+                    description=r["description"],
+                    status=r.get("status") or "ungrounded",
+                    signoff_state=(_sf.get("state") if isinstance(_sf, dict) else None),
+                    signoff=r.get("signoff"),
+                )
+            )
     except Exception as exc:
         logger.debug("[preflight] HITL annotation queries failed: %s", exc)
 
diff --git a/handlers/ratify.py b/handlers/ratify.py
index cf8a7c4a..32594690 100644
--- a/handlers/ratify.py
+++ b/handlers/ratify.py
@@ -10,13 +10,15 @@
 No unratify. Rescinding ratification or rejection requires writing a new
 decision that supersedes the previous one — clean audit trail, no rollback.
 """
+
 from __future__ import annotations
 
 import logging
-from datetime import datetime, timezone
+from datetime import UTC, datetime
 
 from contracts import RatifyResponse
 from ledger.queries import decision_exists, project_decision_status
+
 # triage-adapt: dropped preflight_telemetry import from auto-merge — module
 # is on dev (#65 preflight telemetry) but not on triage; the cherry-picked
 # body doesn't actually reference it (intent of e6d4b8f for this file is
@@ -62,7 +64,11 @@ async def handle_ratify(
     )
     existing_signoff = (rows[0].get("signoff") if rows else None) or None
 
-    if existing_signoff and isinstance(existing_signoff, dict) and existing_signoff.get("state") == target_state:
+    if (
+        existing_signoff
+        and isinstance(existing_signoff, dict)
+        and existing_signoff.get("state") == target_state
+    ):
         projected = await project_decision_status(client, decision_id)
         return RatifyResponse(
             decision_id=decision_id,
@@ -73,7 +79,7 @@ async def handle_ratify(
 
     head_ref = getattr(ctx, "authoritative_sha", "") or ""
     session_id = getattr(ctx, "session_id", None) or ""
-    now_iso = datetime.now(timezone.utc).isoformat()
+    now_iso = datetime.now(UTC).isoformat()
 
     if action == "ratify":
         signoff = {
@@ -100,7 +106,10 @@ async def handle_ratify(
 
     logger.info(
         "[ratify] decision=%s action=%s signer=%s projected_status=%s",
-        decision_id, action, signer, projected,
+        decision_id,
+        action,
+        signer,
+        projected,
     )
 
     return RatifyResponse(
diff --git a/handlers/reset.py b/handlers/reset.py
index 1b3de739..2814ddb1 100644
--- a/handlers/reset.py
+++ b/handlers/reset.py
@@ -48,7 +48,11 @@ async def handle_reset(
     ledger = ctx.ledger
     if hasattr(ledger, "connect"):
         await ledger.connect()
-    if confirm and hasattr(ledger, "force_migrate") and getattr(ledger, "_pending_destructive", None):
+    if (
+        confirm
+        and hasattr(ledger, "force_migrate")
+        and getattr(ledger, "_pending_destructive", None)
+    ):
         await ledger.force_migrate()
 
     cursors = await _get_cursors(ledger, ctx.repo_path)
@@ -68,7 +72,11 @@ async def handle_reset(
 
     if not confirm:
         if wipe_mode == "full":
-            dir_desc = f" and the entire .bicameral/ directory at {bicameral_dir!r}" if bicameral_dir else ""
+            dir_desc = (
+                f" and the entire .bicameral/ directory at {bicameral_dir!r}"
+                if bicameral_dir
+                else ""
+            )
             next_action = (
                 f"DRY RUN — FULL WIPE. Would delete {cursors_before} source_cursor row(s), "
                 f"every bicameral node/edge scoped to {ctx.repo_path!r}{dir_desc}. "
@@ -95,6 +103,7 @@ async def handle_reset(
     # Invalidate within-call sync cache before any destructive operation.
     try:
         from handlers.link_commit import invalidate_sync_cache
+
         invalidate_sync_cache(ctx)
     except Exception:
         pass
@@ -123,7 +132,10 @@ async def handle_reset(
 
     logger.info(
         "[reset] wipe_mode=%s, wiped %d source_cursor(s) for repo=%s bicameral_dir=%r",
-        wipe_mode, cursors_before, ctx.repo_path, bicameral_dir,
+        wipe_mode,
+        cursors_before,
+        ctx.repo_path,
+        bicameral_dir,
     )
 
     if wipe_mode == "full":
@@ -165,15 +177,14 @@ async def _wipe_ledger(ledger, repo_path: str) -> None:
     inner = getattr(ledger, "_inner", ledger)
     client = getattr(inner, "_client", None)
     if client is None:
-        raise RuntimeError(
-            "reset: ledger adapter does not expose wipe_all_rows or an inner client"
-        )
+        raise RuntimeError("reset: ledger adapter does not expose wipe_all_rows or an inner client")
     import shutil
+
     url = getattr(inner, "_url", "")
     await client.close()
     inner._connected = False
     if url.startswith("surrealkv://"):
-        db_path = url[len("surrealkv://"):]
+        db_path = url[len("surrealkv://") :]
         if db_path:
             shutil.rmtree(db_path, ignore_errors=True)
     await inner._ensure_connected()
@@ -219,7 +230,7 @@ def _resolve_bicameral_dir(ledger) -> str:
             continue
         url = getattr(obj, "_url", "")
         if url.startswith("surrealkv://"):
-            db_path = url[len("surrealkv://"):]
+            db_path = url[len("surrealkv://") :]
             if db_path:
                 return str(Path(db_path).expanduser().parent)
     return ""
@@ -251,4 +262,5 @@ def _resolve_ledger_url(ctx, ledger) -> str:
         if v:
             return str(v)
     import os
+
     return os.environ.get("SURREAL_URL", "")
diff --git a/handlers/resolve_collision.py b/handlers/resolve_collision.py
index eb739b3f..57730514 100644
--- a/handlers/resolve_collision.py
+++ b/handlers/resolve_collision.py
@@ -21,7 +21,7 @@
 from __future__ import annotations
 
 import logging
-from datetime import datetime, timezone
+from datetime import UTC, datetime
 
 from contracts import ResolveCollisionResponse
 from ledger.queries import (
@@ -39,7 +39,7 @@ async def handle_resolve_collision(
     # Collision mode params
     new_id: str | None = None,
     old_id: str | None = None,
-    action: str | None = None,       # 'supersede' | 'keep_both'
+    action: str | None = None,  # 'supersede' | 'keep_both'
     # Context-for mode params
     span_id: str | None = None,
     decision_id: str | None = None,
@@ -54,14 +54,16 @@ async def handle_resolve_collision(
     client = inner._client
 
     _session_id = getattr(ctx, "session_id", None) or ""
-    _now_iso = datetime.now(timezone.utc).isoformat()
+    _now_iso = datetime.now(UTC).isoformat()
 
     # ── Collision mode ────────────────────────────────────────────────────
     if action is not None:
         if not new_id or not old_id:
             raise ValueError("collision mode requires new_id and old_id")
         if action not in ("supersede", "keep_both", "link_parent"):
-            raise ValueError(f"action must be 'supersede', 'keep_both', or 'link_parent', got {action!r}")
+            raise ValueError(
+                f"action must be 'supersede', 'keep_both', or 'link_parent', got {action!r}"
+            )
 
         if not await decision_exists(client, new_id):
             raise ValueError(f"No decision row for new_id={new_id}")
@@ -85,9 +87,7 @@ async def handle_resolve_collision(
             )
             old_status = result.get("old_status", "superseded")
 
-            logger.info(
-                "[resolve_collision] supersede: %s supersedes %s", new_id, old_id
-            )
+            logger.info("[resolve_collision] supersede: %s supersedes %s", new_id, old_id)
 
         elif action == "link_parent":
             # Cross-level parent-child link: write parent_decision_id on the child (new_id).
@@ -116,9 +116,7 @@ async def handle_resolve_collision(
 
         else:  # keep_both
             old_status = ""
-            logger.info(
-                "[resolve_collision] keep_both: %s and %s both remain", new_id, old_id
-            )
+            logger.info("[resolve_collision] keep_both: %s and %s both remain", new_id, old_id)
 
         # Clear collision_pending on new decision so it enters normal flow
         _proposed_signoff = {
@@ -150,7 +148,9 @@ async def handle_resolve_collision(
 
         state = "confirmed" if confirmed else "rejected"
         await relate_context_for(
-            client, span_id, decision_id,
+            client,
+            span_id,
+            decision_id,
             state=state,
             relevance_score=0.0,
             reason=f"human-{state} via resolve_collision session={_session_id}",
@@ -158,7 +158,9 @@ async def handle_resolve_collision(
 
         logger.info(
             "[resolve_collision] context_for: span=%s decision=%s state=%s",
-            span_id, decision_id, state,
+            span_id,
+            decision_id,
+            state,
         )
 
         return ResolveCollisionResponse(
diff --git a/handlers/resolve_compliance.py b/handlers/resolve_compliance.py
index 7e16beae..cfac4439 100644
--- a/handlers/resolve_compliance.py
+++ b/handlers/resolve_compliance.py
@@ -21,10 +21,11 @@
 A missing or mismatched flow_id logs a warning (stale/orphaned call). This
 will become a hard error once the codebase fully migrates to flow_id usage.
 """
+
 from __future__ import annotations
 
 import logging
-from typing import Iterable
+from collections.abc import Iterable
 
 from contracts import (
     ComplianceVerdict,
@@ -80,9 +81,7 @@ async def handle_resolve_compliance(
     last-verdict-wins caveat from v0.4.x).
     """
     if phase not in _VALID_PHASES:
-        raise ValueError(
-            f"Unknown phase {phase!r} — must be one of {sorted(_VALID_PHASES)}"
-        )
+        raise ValueError(f"Unknown phase {phase!r} — must be one of {sorted(_VALID_PHASES)}")
 
     sync_state = getattr(ctx, "_sync_state", None)
     is_ephemeral = False
@@ -92,7 +91,8 @@ async def handle_resolve_compliance(
             logger.warning(
                 "[resolve_compliance] flow_id mismatch: expected %s, got %s — "
                 "verdicts may be stale or from a different link_commit call",
-                expected_flow_id[:8], (flow_id or "missing")[:8],
+                expected_flow_id[:8],
+                (flow_id or "missing")[:8],
             )
         elif expected_flow_id and not flow_id:
             logger.warning(
@@ -117,21 +117,25 @@ async def handle_resolve_compliance(
 
     for v in parsed:
         if not await decision_exists(client, v.decision_id):
-            rejected.append(ResolveComplianceRejection(
-                decision_id=v.decision_id,
-                region_id=v.region_id,
-                reason="unknown_decision_id",
-                detail=f"no decision row for {v.decision_id}",
-            ))
+            rejected.append(
+                ResolveComplianceRejection(
+                    decision_id=v.decision_id,
+                    region_id=v.region_id,
+                    reason="unknown_decision_id",
+                    detail=f"no decision row for {v.decision_id}",
+                )
+            )
             continue
 
         if not await region_exists(client, v.region_id):
-            rejected.append(ResolveComplianceRejection(
-                decision_id=v.decision_id,
-                region_id=v.region_id,
-                reason="unknown_region_id",
-                detail=f"no code_region row for {v.region_id}",
-            ))
+            rejected.append(
+                ResolveComplianceRejection(
+                    decision_id=v.decision_id,
+                    region_id=v.region_id,
+                    reason="unknown_region_id",
+                    detail=f"no code_region row for {v.region_id}",
+                )
+            )
             continue
 
         is_pruned = v.verdict == "not_relevant"
@@ -145,7 +149,8 @@ async def handle_resolve_compliance(
             except Exception as exc:
                 logger.warning(
                     "[resolve_compliance] promote_ephemeral_verdict failed for %s: %s",
-                    v.decision_id, exc,
+                    v.decision_id,
+                    exc,
                 )
 
         await upsert_compliance_check(
@@ -169,12 +174,14 @@ async def handle_resolve_compliance(
 
         affected_decision_ids.add(v.decision_id)
 
-        accepted.append(ResolveComplianceAccepted(
-            decision_id=v.decision_id,
-            region_id=v.region_id,
-            phase=phase,
-            verdict=v.verdict,
-        ))
+        accepted.append(
+            ResolveComplianceAccepted(
+                decision_id=v.decision_id,
+                region_id=v.region_id,
+                phase=phase,
+                verdict=v.verdict,
+            )
+        )
 
     # Sync code_region.content_hash to the verdict hash for every accepted verdict.
     # project_decision_status looks up verdicts by (decision_id, region_id,
@@ -187,7 +194,9 @@ async def handle_resolve_compliance(
         try:
             await update_region_hash(client, v.region_id, v.content_hash)
         except Exception as exc:
-            logger.warning("[resolve_compliance] update_region_hash failed for %s: %s", v.region_id, exc)
+            logger.warning(
+                "[resolve_compliance] update_region_hash failed for %s: %s", v.region_id, exc
+            )
 
     # v0.5.0: holistic status projection after the full batch is written.
     # Replaces the per-verdict last-verdict-wins update from v0.4.x.
@@ -197,11 +206,15 @@ async def handle_resolve_compliance(
 
     logger.info(
         "[resolve_compliance] phase=%s accepted=%d rejected=%d commit=%s",
-        phase, len(accepted), len(rejected), (commit_hash or "")[:8] or "n/a",
+        phase,
+        len(accepted),
+        len(rejected),
+        (commit_hash or "")[:8] or "n/a",
     )
 
     try:
         from dashboard.server import notify_dashboard
+
         await notify_dashboard(ctx)
     except Exception:
         pass
diff --git a/handlers/search_decisions.py b/handlers/search_decisions.py
index c85d3e13..8913e5b6 100644
--- a/handlers/search_decisions.py
+++ b/handlers/search_decisions.py
@@ -8,7 +8,13 @@
 
 import time
 
-from contracts import CodeRegionSummary, DecisionMatch, LinkCommitResponse, SearchDecisionsResponse, SyncMetrics
+from contracts import (
+    CodeRegionSummary,
+    DecisionMatch,
+    LinkCommitResponse,
+    SearchDecisionsResponse,
+    SyncMetrics,
+)
 from handlers.action_hints import generate_hints_for_search
 from handlers.link_commit import handle_link_commit
 
@@ -29,7 +35,9 @@ async def handle_search_decisions(
     sync_status: LinkCommitResponse = await handle_link_commit(ctx, "HEAD")
     catchup_ms = round((time.perf_counter() - t0) * 1000, 3)
 
-    raw_matches = await ctx.ledger.search_by_query(query, max_results=max_results, min_confidence=min_confidence)
+    raw_matches = await ctx.ledger.search_by_query(
+        query, max_results=max_results, min_confidence=min_confidence
+    )
 
     matches: list[DecisionMatch] = []
     suggested_review: list[str] = []
@@ -58,20 +66,22 @@ async def handle_search_decisions(
             suggested_review.append(m["decision_id"])
 
         _signoff = m.get("signoff") or {}
-        matches.append(DecisionMatch(
-            decision_id=m["decision_id"],
-            description=m["description"],
-            status=status,
-            signoff_state=(_signoff.get("state") if isinstance(_signoff, dict) else None),
-            confidence=m.get("confidence", 0.5),
-            source_ref=m.get("source_ref", ""),
-            code_regions=regions,
-            drift_evidence=m.get("drift_evidence", ""),
-            related_constraints=m.get("related_constraints", []),
-            source_excerpt=m.get("source_excerpt", ""),
-            meeting_date=m.get("meeting_date", ""),
-            signoff=m.get("signoff"),
-        ))
+        matches.append(
+            DecisionMatch(
+                decision_id=m["decision_id"],
+                description=m["description"],
+                status=status,
+                signoff_state=(_signoff.get("state") if isinstance(_signoff, dict) else None),
+                confidence=m.get("confidence", 0.5),
+                source_ref=m.get("source_ref", ""),
+                code_regions=regions,
+                drift_evidence=m.get("drift_evidence", ""),
+                related_constraints=m.get("related_constraints", []),
+                source_excerpt=m.get("source_excerpt", ""),
+                meeting_date=m.get("meeting_date", ""),
+                signoff=m.get("signoff"),
+            )
+        )
 
     ungrounded_count = sum(1 for m in matches if m.status == "ungrounded")
 
@@ -83,7 +93,8 @@ async def handle_search_decisions(
         suggested_review=suggested_review,
     )
     response.action_hints = generate_hints_for_search(
-        response, guided_mode=getattr(ctx, "guided_mode", False),
+        response,
+        guided_mode=getattr(ctx, "guided_mode", False),
     )
     response.sync_metrics = SyncMetrics(sync_catchup_ms=catchup_ms)
     return response
diff --git a/handlers/sync_middleware.py b/handlers/sync_middleware.py
index 9d582b41..52e376d5 100644
--- a/handlers/sync_middleware.py
+++ b/handlers/sync_middleware.py
@@ -17,7 +17,7 @@
 import logging
 import time
 from contextlib import asynccontextmanager
-from datetime import datetime, timezone
+from datetime import UTC, datetime
 from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
@@ -30,7 +30,6 @@
 _LAST_SYNCED_SHA: str | None = None
 
 
-
 # ── V1 A2-light: per-repo write barrier ─────────────────────────────────
 # Module-level registry of per-repo asyncio.Locks. Serializes mutating
 # handlers against the same repo inside a single MCP server process.
@@ -95,6 +94,7 @@ class BarrierTiming:
     Handlers read it after the ``async with`` block to attach the number
     to their ``SyncMetrics`` response field.
     """
+
     __slots__ = ("held_ms",)
 
     def __init__(self) -> None:
@@ -129,7 +129,7 @@ def _reset_repo_locks_for_tests() -> None:
 _BANNER_MAX_ITEMS = 10
 
 
-async def get_session_start_banner(ctx) -> "SessionStartBanner | None":
+async def get_session_start_banner(ctx) -> SessionStartBanner | None:
     """Return open-decision summary for session start, or None if nothing actionable.
 
     Fires exactly once per session (keyed on ctx._sync_state["session_started"]).
@@ -150,17 +150,14 @@ async def get_session_start_banner(ctx) -> "SessionStartBanner | None":
     except Exception:
         return None
 
-    now = datetime.now(timezone.utc)
+    now = datetime.now(UTC)
 
     drifted_rows = [r for r in rows if r.get("status") == "drifted"]
-    proposal_rows = [
-        r for r in rows
-        if (r.get("signoff") or {}).get("state") == "proposed"
-    ]
+    proposal_rows = [r for r in rows if (r.get("signoff") or {}).get("state") == "proposed"]
     real_ungrounded_rows = [
-        r for r in rows
-        if r.get("status") == "ungrounded"
-        and (r.get("signoff") or {}).get("state") != "proposed"
+        r
+        for r in rows
+        if r.get("status") == "ungrounded" and (r.get("signoff") or {}).get("state") != "proposed"
     ]
 
     stale_proposals = []
@@ -191,13 +188,15 @@ async def get_session_start_banner(ctx) -> "SessionStartBanner | None":
     items = []
     for r in visible:
         signoff = r.get("signoff") or {}
-        items.append({
-            "decision_id": r.get("decision_id", r.get("id", "")),
-            "description": r.get("description", ""),
-            "status": r.get("status", ""),
-            "signoff_state": signoff.get("state"),
-            "source_ref": r.get("source_ref", ""),
-        })
+        items.append(
+            {
+                "decision_id": r.get("decision_id", r.get("id", "")),
+                "description": r.get("description", ""),
+                "status": r.get("status", ""),
+                "signoff_state": signoff.get("state"),
+                "source_ref": r.get("source_ref", ""),
+            }
+        )
 
     parts = []
     if drifted_count:
@@ -222,7 +221,7 @@ async def get_session_start_banner(ctx) -> "SessionStartBanner | None":
     )
 
 
-async def ensure_ledger_synced(ctx) -> "LinkCommitResponse | None":
+async def ensure_ledger_synced(ctx) -> LinkCommitResponse | None:
     """Sync ledger to HEAD if it has moved since the last sync in this process.
 
     Returns the LinkCommitResponse when a new commit was processed — callers
@@ -232,7 +231,8 @@ async def ensure_ledger_synced(ctx) -> "LinkCommitResponse | None":
     global _LAST_SYNCED_SHA
 
     try:
-        from handlers.link_commit import handle_link_commit, _read_current_head_sha
+        from handlers.link_commit import _read_current_head_sha, handle_link_commit
+
         live_head = _read_current_head_sha(getattr(ctx, "repo_path", "") or ".")
         if live_head and live_head != _LAST_SYNCED_SHA:
             result = await handle_link_commit(ctx, "HEAD")
diff --git a/handlers/update.py b/handlers/update.py
index 229c755f..a743b7e2 100644
--- a/handlers/update.py
+++ b/handlers/update.py
@@ -17,7 +17,7 @@
 import sys
 import time
 import urllib.request
-from typing import Optional
+from pathlib import Path
 
 logger = logging.getLogger(__name__)
 
@@ -45,7 +45,7 @@ def _save_cache(data: dict) -> None:
         pass
 
 
-def _fetch_recommended_version() -> Optional[str]:
+def _fetch_recommended_version() -> str | None:
     """Fetch RECOMMENDED_VERSION from GitHub with a 1-hour cache."""
     cache = _load_cache()
     now = time.time()
@@ -84,7 +84,7 @@ def get_update_notice(current_version: str) -> dict | None:
         "action_required": (
             f"Ask the user: 'bicameral-mcp v{recommended} is available "
             f"(you are on v{current_version}) — upgrade now? (yes/no)'. "
-            "If yes, call bicameral.update {\"action\": \"apply\"}."
+            'If yes, call bicameral.update {"action": "apply"}.'
         ),
     }
 
@@ -134,12 +134,12 @@ def _apply_pending_migration(repo_path: str) -> dict:
       replay_plan: list[dict]     (only when migrated=True)
       error: str                  (only on failure)
     """
-    import tempfile, os
+    import os
+    import tempfile
+
     tmp = None
     try:
-        with tempfile.NamedTemporaryFile(
-            mode="w", suffix=".py", delete=False
-        ) as f:
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f:
             f.write(_MIGRATION_SCRIPT)
             tmp = f.name
         result = subprocess.run(
@@ -167,6 +167,7 @@ def _read_guided_from_config(repo_path: str) -> bool:
     """Return the guided: flag from .bicameral/config.yaml, defaulting to False."""
     try:
         import re
+
         config_path = Path(repo_path) / ".bicameral" / "config.yaml"
         if not config_path.exists():
             return False
@@ -191,7 +192,7 @@ def _reinstall_skills(repo_path: str) -> int:
             f"rp = Path(r'{repo_path}'); "
             f"n = _install_skills(rp); "
             f"_install_claude_hooks(rp); "
-            + (f"_install_git_post_commit_hook(rp); " if guided else "")
+            + ("_install_git_post_commit_hook(rp); " if guided else "")
             + "print(n)"
         )
         result = subprocess.run(
@@ -249,6 +250,7 @@ async def handle_update(action: str, current_version: str, repo_path: str = "")
             # and handles externally-managed-environment restrictions on macOS.
             # Fall back to pip for venv/dev installs.
             import shutil
+
             if shutil.which("pipx"):
                 cmd = ["pipx", "install", target, "--force"]
             else:
@@ -270,7 +272,9 @@ async def handle_update(action: str, current_version: str, repo_path: str = "")
                 )
 
                 # Auto-apply any pending destructive migration using the new binary.
-                migration_result = _apply_pending_migration(repo_path) if repo_path else {"migrated": False}
+                migration_result = (
+                    _apply_pending_migration(repo_path) if repo_path else {"migrated": False}
+                )
                 if migration_result.get("migrated"):
                     cursors_wiped = migration_result.get("cursors_wiped", 0)
                     replay_plan = migration_result.get("replay_plan", [])
diff --git a/handlers/usage_summary.py b/handlers/usage_summary.py
index c3ddd69a..8d0bbfb9 100644
--- a/handlers/usage_summary.py
+++ b/handlers/usage_summary.py
@@ -11,7 +11,7 @@
 from __future__ import annotations
 
 import logging
-from datetime import datetime, timedelta, timezone
+from datetime import UTC, datetime, timedelta
 
 from local_counters import read_counters
 
@@ -54,7 +54,7 @@ async def handle_usage_summary(ctx, days: int = 7) -> dict:
 
     try:
         ledger = ctx.ledger
-        cutoff = (datetime.now(timezone.utc) - timedelta(days=period_days)).isoformat()
+        cutoff = (datetime.now(UTC) - timedelta(days=period_days)).isoformat()
         client = getattr(getattr(ledger, "_inner", ledger), "_client", None)
         if client is None:
             return base
@@ -89,9 +89,7 @@ async def handle_usage_summary(ctx, days: int = 7) -> dict:
                 f"WHERE checked_at > <datetime>'{cutoff}' "
                 "AND verdict IN ['drifted', 'cosmetic_autopass'] GROUP BY verdict"
             )
-            cc_counts = {
-                r.get("verdict"): int(r.get("n", 0)) for r in (cc_rows or [])
-            }
+            cc_counts = {r.get("verdict"): int(r.get("n", 0)) for r in (cc_rows or [])}
             cosmetic = cc_counts.get("cosmetic_autopass", 0)
             drift_total = cosmetic + cc_counts.get("drifted", 0)
             if drift_total > 0:
diff --git a/ledger/__init__.py b/ledger/__init__.py
index de51d781..0217c078 100644
--- a/ledger/__init__.py
+++ b/ledger/__init__.py
@@ -1,4 +1,5 @@
 """Decision Ledger — SurrealDB-backed implementation for Phase 2."""
+
 from .adapter import SurrealDBLedgerAdapter
 from .client import LedgerClient
 
diff --git a/ledger/adapter.py b/ledger/adapter.py
index bee2b755..ab26eb5e 100644
--- a/ledger/adapter.py
+++ b/ledger/adapter.py
@@ -16,11 +16,10 @@
 from .client import LedgerClient
 from .queries import (
     decision_exists,
-    delete_binds_to_edge,
     find_subject_identities_for_decision,
     get_all_decisions,
-    get_decision_level,
     get_compliance_verdict,
+    get_decision_level,
     get_decisions_for_file,
     get_decisions_for_files,
     get_pending_decisions_with_regions,
@@ -35,7 +34,6 @@
     lookup_vocab_cache,
     project_decision_status,
     promote_ephemeral_verdict,
-    region_exists,
     relate_binds_to,
     relate_has_identity,
     relate_locates,
@@ -65,7 +63,6 @@
     resolve_ref,
 )
 
-
 _CODE_BODY_LINE_CAP = 200
 
 
@@ -100,6 +97,7 @@ def _get_branch_delta_files(authoritative_ref: str, commit_hash: str, repo_path:
     Returns [] if the command fails or authoritative_ref is unreachable.
     """
     import subprocess as _sp
+
     try:
         result = _sp.run(
             ["git", "diff", f"{authoritative_ref}...{commit_hash}", "--name-only"],
@@ -206,7 +204,6 @@ async def search_by_query(
 
     async def decision_exists(self, decision_id: str) -> bool:
         await self._ensure_connected()
-        from .queries import decision_exists
         return await decision_exists(self._client, decision_id)
 
     async def get_decision_description(self, decision_id: str) -> str:
@@ -254,7 +251,9 @@ async def bind_decision(
             raise ValueError(f"upsert_code_region returned empty id for {file_path}:{symbol_name}")
 
         await relate_binds_to(
-            self._client, decision_id, region_id,
+            self._client,
+            decision_id,
+            region_id,
             confidence=0.95,
             provenance={"method": "caller_llm"},
         )
@@ -307,7 +306,10 @@ async def relate_has_identity(
     ) -> None:
         await self._ensure_connected()
         await relate_has_identity(
-            self._client, code_subject_id, subject_identity_id, confidence=confidence,
+            self._client,
+            code_subject_id,
+            subject_identity_id,
+            confidence=confidence,
         )
 
     async def link_decision_to_subject(
@@ -318,7 +320,10 @@ async def link_decision_to_subject(
     ) -> None:
         await self._ensure_connected()
         await link_decision_to_subject(
-            self._client, decision_id, code_subject_id, confidence=confidence,
+            self._client,
+            decision_id,
+            code_subject_id,
+            confidence=confidence,
         )
 
     async def find_subject_identities_for_decision(
@@ -392,6 +397,7 @@ async def ingest_commit(
 
         if drift_analyzer is None:
             from .drift import HashDriftAnalyzer
+
             drift_analyzer = HashDriftAnalyzer()
 
         if commit_hash == "HEAD":
@@ -402,6 +408,7 @@ async def ingest_commit(
         is_authoritative = True
         if authoritative_ref:
             import subprocess
+
             try:
                 result = subprocess.run(
                     ["git", "rev-parse", "--abbrev-ref", "HEAD"],
@@ -418,7 +425,8 @@ async def ingest_commit(
                 logger.info(
                     "[link_commit] current branch %s != authoritative %s — "
                     "running in read-only mode (no baseline writes)",
-                    current_branch, authoritative_ref,
+                    current_branch,
+                    authoritative_ref,
                 )
 
         state = await get_sync_state(self._client, repo_path)
@@ -440,18 +448,22 @@ async def ingest_commit(
                     if not current_hash:
                         continue
                     code_body = _extract_code_body(fp, sl, el, repo_path, ref=commit_hash)
-                    pending_checks.append({
-                        "phase": "ingest",
-                        "decision_id": str(row.get("decision_id", "")),
-                        "region_id": region_id,
-                        "decision_description": str(row.get("description", "")),
-                        "file_path": fp,
-                        "symbol": row.get("symbol_name", ""),
-                        "content_hash": current_hash,
-                        "code_body": code_body,
-                    })
+                    pending_checks.append(
+                        {
+                            "phase": "ingest",
+                            "decision_id": str(row.get("decision_id", "")),
+                            "region_id": region_id,
+                            "decision_description": str(row.get("description", "")),
+                            "file_path": fp,
+                            "symbol": row.get("symbol_name", ""),
+                            "content_hash": current_hash,
+                            "code_body": code_body,
+                        }
+                    )
             except Exception as exc:
-                logger.warning("[link_commit] could not surface pending decisions on already_synced: %s", exc)
+                logger.warning(
+                    "[link_commit] could not surface pending decisions on already_synced: %s", exc
+                )
 
             # Repair stale ephemeral hashes on authoritative branches.
             # A feature-branch bind sets code_region.content_hash = H_branch.
@@ -480,7 +492,7 @@ async def ingest_commit(
                             continue
                         await update_region_hash(self._client, region_id, actual_hash, commit_hash)
                         regions_repaired += 1
-                        for decision in (region.get("decisions") or []):
+                        for decision in region.get("decisions") or []:
                             if decision is None:
                                 continue
                             decision_id = str(decision.get("id", ""))
@@ -520,7 +532,8 @@ async def ingest_commit(
             if range_files is None:
                 logger.warning(
                     "[link_commit] range %s..%s unreachable, falling back to head-only sweep",
-                    last_synced[:8], commit_hash[:8],
+                    last_synced[:8],
+                    commit_hash[:8],
                 )
                 changed_files = get_changed_files(commit_hash, repo_path)
                 sweep_scope = "head_only"
@@ -530,7 +543,8 @@ async def ingest_commit(
                 if len(changed_files) > _MAX_SWEEP_FILES:
                     logger.warning(
                         "[link_commit] range sweep capped at %d files (would have swept %d).",
-                        _MAX_SWEEP_FILES, len(changed_files),
+                        _MAX_SWEEP_FILES,
+                        len(changed_files),
                     )
                     changed_files = changed_files[:_MAX_SWEEP_FILES]
                     sweep_scope = "range_truncated"
@@ -613,10 +627,13 @@ async def ingest_commit(
             if is_authoritative:
                 await update_region_hash(self._client, region_id, actual_hash, commit_hash)
                 from .status import resolve_symbol_lines
+
                 resolved = resolve_symbol_lines(file_path, symbol_name, repo_path, ref=commit_hash)
                 if resolved is None:
                     symbol_disappeared = True
-                elif resolved[0] != region.get("start_line") or resolved[1] != region.get("end_line"):
+                elif resolved[0] != region.get("start_line") or resolved[1] != region.get(
+                    "end_line"
+                ):
                     await self._client.query(
                         f"UPDATE {region_id} SET start_line = $sl, end_line = $el",
                         {"sl": resolved[0], "el": resolved[1]},
@@ -627,7 +644,7 @@ async def ingest_commit(
             phase = "ingest" if not stored_hash else "drift"
 
             # v0.5.0: decisions are accessed via binds_to (renamed from intents via maps_to)
-            for decision in (region.get("decisions") or []):
+            for decision in region.get("decisions") or []:
                 if decision is None:
                     continue
                 decision_id = str(decision.get("id", ""))
@@ -654,20 +671,25 @@ async def ingest_commit(
                 if symbol_disappeared:
                     # L1 decisions are intentionally ungrounded — skip grounding alarm.
                     if decision.get("decision_level") != "L1":
-                        pending_grounding_checks.append({
-                            "decision_id": decision_id,
-                            "description": str(decision.get("description", "")),
-                            "reason": "symbol_disappeared",
-                            "file_path": file_path,
-                            "symbol": symbol_name,
-                            "original_lines": [start_line, end_line],
-                        })
+                        pending_grounding_checks.append(
+                            {
+                                "decision_id": decision_id,
+                                "description": str(decision.get("description", "")),
+                                "reason": "symbol_disappeared",
+                                "file_path": file_path,
+                                "symbol": symbol_name,
+                                "original_lines": [start_line, end_line],
+                            }
+                        )
                     continue
 
                 verdict: dict | None = None
                 if actual_hash:
                     verdict = await get_compliance_verdict(
-                        self._client, decision_id, region_id, actual_hash,
+                        self._client,
+                        decision_id,
+                        region_id,
+                        actual_hash,
                     )
 
                 new_status = derive_status(stored_hash, actual_hash, cached_verdict=verdict)
@@ -675,7 +697,9 @@ async def ingest_commit(
                 if is_authoritative:
                     # V2: promote ephemeral verdict when same hash lands on authoritative branch
                     if actual_hash:
-                        await promote_ephemeral_verdict(self._client, decision_id, region_id, actual_hash)
+                        await promote_ephemeral_verdict(
+                            self._client, decision_id, region_id, actual_hash
+                        )
                     # v0.5.0: holistic status projection from DB
                     projected = await project_decision_status(self._client, decision_id)
                     await update_decision_status(self._client, decision_id, projected)
@@ -689,8 +713,12 @@ async def ingest_commit(
                         fb_status = "pending"
                     elif actual_hash == stored_hash:
                         if verdict is not None and not verdict.get("pruned"):
-                            fb_status = "reflected" if verdict.get("verdict") == "compliant" else "drifted"
-                        elif await has_prior_compliant_verdict(self._client, decision_id, region_id):
+                            fb_status = (
+                                "reflected" if verdict.get("verdict") == "compliant" else "drifted"
+                            )
+                        elif await has_prior_compliant_verdict(
+                            self._client, decision_id, region_id
+                        ):
                             fb_status = "drifted"
                         else:
                             fb_status = "pending"
@@ -710,18 +738,24 @@ async def ingest_commit(
                 if actual_hash and verdict is None:
                     if region_code_body is None:
                         region_code_body = _extract_code_body(
-                            file_path, start_line, end_line, repo_path, ref=commit_hash,
+                            file_path,
+                            start_line,
+                            end_line,
+                            repo_path,
+                            ref=commit_hash,
                         )
-                    pending_checks.append({
-                        "phase": phase,
-                        "decision_id": decision_id,
-                        "region_id": region_id,
-                        "decision_description": str(decision.get("description", "")),
-                        "file_path": file_path,
-                        "symbol": symbol_name,
-                        "content_hash": actual_hash,
-                        "code_body": region_code_body,
-                    })
+                    pending_checks.append(
+                        {
+                            "phase": phase,
+                            "decision_id": decision_id,
+                            "region_id": region_id,
+                            "decision_description": str(decision.get("description", "")),
+                            "file_path": file_path,
+                            "symbol": symbol_name,
+                            "content_hash": actual_hash,
+                            "code_body": region_code_body,
+                        }
+                    )
 
             decisions = [i for i in (region.get("decisions") or []) if i is not None]
             if not decisions and symbol_name:
@@ -740,11 +774,13 @@ async def ingest_commit(
                 # `d["id"]` returns "" and produces unusable grounding
                 # checks the caller cannot bind against. Surfaced by V1 F1
                 # regression coverage.
-                pending_grounding_checks.append({
-                    "decision_id": str(d.get("decision_id") or d.get("id", "")),
-                    "description": str(d.get("description", "")),
-                    "reason": "ungrounded",
-                })
+                pending_grounding_checks.append(
+                    {
+                        "decision_id": str(d.get("decision_id") or d.get("id", "")),
+                        "description": str(d.get("description", "")),
+                        "reason": "ungrounded",
+                    }
+                )
         except Exception as exc:
             logger.warning("[link_commit] could not query ungrounded decisions: %s", exc)
 
@@ -765,16 +801,18 @@ async def ingest_commit(
                 if not current_hash:
                     continue
                 code_body = _extract_code_body(fp, sl, el, repo_path, ref=commit_hash)
-                pending_checks.append({
-                    "phase": "drift",
-                    "decision_id": str(row.get("decision_id", "")),
-                    "region_id": region_id,
-                    "decision_description": str(row.get("description", "")),
-                    "file_path": fp,
-                    "symbol": row.get("symbol_name", ""),
-                    "content_hash": current_hash,
-                    "code_body": code_body,
-                })
+                pending_checks.append(
+                    {
+                        "phase": "drift",
+                        "decision_id": str(row.get("decision_id", "")),
+                        "region_id": region_id,
+                        "decision_description": str(row.get("description", "")),
+                        "file_path": fp,
+                        "symbol": row.get("symbol_name", ""),
+                        "content_hash": current_hash,
+                        "code_body": code_body,
+                    }
+                )
         except Exception as exc:
             logger.warning("[link_commit] could not surface stale pending decisions: %s", exc)
 
@@ -805,6 +843,7 @@ async def backfill_empty_hashes(
 
         if drift_analyzer is None:
             from .drift import HashDriftAnalyzer
+
             drift_analyzer = HashDriftAnalyzer()
 
         legacy = await get_regions_without_hash(self._client, repo=repo_path)
@@ -842,7 +881,7 @@ async def backfill_empty_hashes(
 
             await update_region_hash(self._client, region_id, drift_result.content_hash, ref)
             new_status = drift_result.status
-            for decision in (region.get("decisions") or []):
+            for decision in region.get("decisions") or []:
                 if decision is None:
                     continue
                 decision_id = str(decision.get("id", ""))
@@ -971,10 +1010,9 @@ async def ingest_payload(self, payload: dict, ctx=None) -> dict:
                     # contexts — fall through with empty hash so the decision
                     # is created as ungrounded (matches pre-v0.10.7 behavior).
                     repo_on_disk = Path(repo).resolve().is_dir()
-                    ref_resolves = (
-                        repo_on_disk
-                        and (effective_ref == "working_tree"
-                             or resolve_ref(effective_ref, repo) is not None)
+                    ref_resolves = repo_on_disk and (
+                        effective_ref == "working_tree"
+                        or resolve_ref(effective_ref, repo) is not None
                     )
                     if repo_on_disk and ref_resolves:
                         _computed = compute_content_hash(
@@ -984,7 +1022,9 @@ async def ingest_payload(self, payload: dict, ctx=None) -> dict:
                             logger.warning(
                                 "[ingest] skipping region: file '%s' not found at %s in %s"
                                 " — only bind to existing code, never hypothetical files",
-                                file_path, effective_ref, repo,
+                                file_path,
+                                effective_ref,
+                                repo,
                             )
                             continue
                         content_hash = _computed
@@ -1025,7 +1065,9 @@ async def ingest_payload(self, payload: dict, ctx=None) -> dict:
                     provenance["grounding_tier"] = grounding_tier
                     provenance["method"] = "auto_ground"
                 await relate_binds_to(
-                    self._client, decision_id, region_id,
+                    self._client,
+                    decision_id,
+                    region_id,
                     confidence=region_data.get("confidence", 0.8),
                     provenance=provenance,
                 )
@@ -1107,12 +1149,13 @@ async def wipe_all_rows(self, repo: str) -> None:
         immediately ready for use after this call returns.
         """
         import shutil
+
         await self._ensure_connected()
         await self._client.close()
         self._connected = False
         url = self._url
         if url.startswith("surrealkv://"):
-            db_path = url[len("surrealkv://"):]
+            db_path = url[len("surrealkv://") :]
             if db_path:
                 shutil.rmtree(db_path, ignore_errors=True)
         await self._ensure_connected()
@@ -1156,9 +1199,7 @@ async def apply_supersede(
             new_id,
             old_id,
             confidence=1.0,
-            reason=(
-                f"human-confirmed supersession via resolve_collision session={session_id}"
-            ),
+            reason=(f"human-confirmed supersession via resolve_collision session={session_id}"),
         )
         rows = await self._client.query(f"SELECT signoff FROM {old_id} LIMIT 1")
         old_signoff: dict = {}
diff --git a/ledger/ast_diff.py b/ledger/ast_diff.py
index e452fad8..ac4e90e7 100644
--- a/ledger/ast_diff.py
+++ b/ledger/ast_diff.py
@@ -41,18 +41,20 @@
 # Languages B1 actually classifies. Anything else returns False (fail-safe).
 # Matches the set wired into code_locator/indexing/symbol_extractor.py so
 # the cosmetic detector never silently diverges from the indexer.
-SUPPORTED_LANGUAGES: frozenset[str] = frozenset({
-    "python",
-    "javascript",
-    "typescript",
-    "java",
-    "go",
-    "rust",
-    "c_sharp",
-    # via LANGUAGE_FALLBACK
-    "jsx",
-    "tsx",
-})
+SUPPORTED_LANGUAGES: frozenset[str] = frozenset(
+    {
+        "python",
+        "javascript",
+        "typescript",
+        "java",
+        "go",
+        "rust",
+        "c_sharp",
+        # via LANGUAGE_FALLBACK
+        "jsx",
+        "tsx",
+    }
+)
 
 
 def is_cosmetic_change(before: str, after: str, lang: str) -> bool:
@@ -93,8 +95,9 @@ def is_cosmetic_change(before: str, after: str, lang: str) -> bool:
         # If either input doesn't parse cleanly, refuse to call it cosmetic.
         if tree_before.root_node.has_error or tree_after.root_node.has_error:
             return False
-        return _signature(tree_before.root_node, before_bytes) == \
-               _signature(tree_after.root_node, after_bytes)
+        return _signature(tree_before.root_node, before_bytes) == _signature(
+            tree_after.root_node, after_bytes
+        )
     except (Exception, RecursionError) as exc:
         logger.debug("[ast_diff] classifier failed for %s: %s", normalized, exc)
         return False
@@ -114,7 +117,7 @@ def _signature(node: Any, source: bytes) -> tuple:
     produces a signature mismatch.
     """
     if node.child_count == 0:
-        return (node.type, source[node.start_byte:node.end_byte])
+        return (node.type, source[node.start_byte : node.end_byte])
     return (
         node.type,
         tuple(_signature(child, source) for child in node.children),
diff --git a/ledger/canonical.py b/ledger/canonical.py
index 67d9e8b5..e05bad85 100644
--- a/ledger/canonical.py
+++ b/ledger/canonical.py
@@ -42,8 +42,7 @@
 import json
 import re
 import unicodedata
-from uuid import NAMESPACE_URL, UUID, uuid5
-
+from uuid import NAMESPACE_URL, uuid5
 
 # Stable namespace UUID for bicameral canonical IDs. Derived from a
 # bicameral-specific URL via UUIDv5(NAMESPACE_URL, "https://bicameral.dev/v0.4.13/canonical").
diff --git a/ledger/client.py b/ledger/client.py
index 54b7c26d..7852d751 100644
--- a/ledger/client.py
+++ b/ledger/client.py
@@ -11,6 +11,7 @@
 from typing import Any
 
 from surrealdb import AsyncSurreal, RecordID
+
 try:
     from surrealdb import SurrealError
 except ImportError:
diff --git a/ledger/drift.py b/ledger/drift.py
index 6adfae07..7d2e32eb 100644
--- a/ledger/drift.py
+++ b/ledger/drift.py
@@ -11,6 +11,7 @@
 from __future__ import annotations
 
 from ports import DriftResult
+
 from .status import compute_content_hash, derive_status, resolve_symbol_lines
 
 
@@ -38,9 +39,7 @@ async def analyze_region(
             start_line, end_line = resolved
 
         # Compute actual hash at this ref
-        actual_hash = compute_content_hash(
-            file_path, start_line, end_line, repo_path, ref=ref
-        )
+        actual_hash = compute_content_hash(file_path, start_line, end_line, repo_path, ref=ref)
 
         # Self-heal legacy regions that were persisted before v0.4.5's
         # baseline-stamping fix. If we have no stored hash but the code
diff --git a/ledger/queries.py b/ledger/queries.py
index 42d02276..5c1bc72d 100644
--- a/ledger/queries.py
+++ b/ledger/queries.py
@@ -11,7 +11,7 @@
 from __future__ import annotations
 
 import logging
-from datetime import datetime, timezone
+from datetime import UTC, datetime
 
 from .client import LedgerClient, LedgerError
 
@@ -25,6 +25,7 @@
 # Team-mode event replay re-issues every RELATE; duplicates are rejected by the
 # DB and treated as a no-op success here.
 
+
 async def _execute_idempotent_edge(
     client: LedgerClient, sql: str, vars: dict | None = None
 ) -> None:
@@ -132,7 +133,7 @@ async def upsert_source_cursor(
         "source_scope": source_scope,
         "cursor": cursor,
         "last_source_ref": last_source_ref,
-        "synced_at": str(datetime.now(timezone.utc).isoformat()),
+        "synced_at": str(datetime.now(UTC).isoformat()),
         "status": status,
         "error": error,
     }
@@ -193,16 +194,13 @@ async def get_all_decisions(
         ca = row.pop("created_at", None)
         row.setdefault("ingested_at", str(ca)[:24] if ca else "")
     for row in rows:
-        for region in (row.get("code_regions") or []):
+        for region in row.get("code_regions") or []:
             if region and "symbol_name" in region:
                 region["symbol"] = region.pop("symbol_name")
     for row in rows:
         spans = row.pop("source_spans", None) or []
         description = row.get("description", "")
-        real_spans = [
-            s for s in spans
-            if s and s.get("text") and s.get("text") != description
-        ]
+        real_spans = [s for s in spans if s and s.get("text") and s.get("text") != description]
         first_span = real_spans[0] if real_spans else None
         row["source_excerpt"] = (first_span.get("text") if first_span else "") or ""
         if not row.get("meeting_date"):
@@ -253,15 +251,12 @@ async def search_by_bm25(
         ca = row.pop("created_at", None)
         row.setdefault("ingested_at", str(ca)[:24] if ca else "")
         row["confidence"] = round(1.0 - (i / max(total, 1)) * 0.4, 2)
-        for region in (row.get("code_regions") or []):
+        for region in row.get("code_regions") or []:
             if region and "symbol_name" in region:
                 region["symbol"] = region.pop("symbol_name")
         spans = row.pop("source_spans", None) or []
         description = row.get("description", "")
-        real_spans = [
-            s for s in spans
-            if s and s.get("text") and s.get("text") != description
-        ]
+        real_spans = [s for s in spans if s and s.get("text") and s.get("text") != description]
         first_span = real_spans[0] if real_spans else None
         row["source_excerpt"] = (first_span.get("text") if first_span else "") or ""
         row["meeting_date"] = (first_span.get("meeting_date") if first_span else "") or ""
@@ -376,7 +371,7 @@ async def get_decisions_for_file(
             "purpose": region_row.get("purpose", ""),
             "content_hash": region_row.get("content_hash", ""),
         }
-        for decision in (region_row.get("decisions") or []):
+        for decision in region_row.get("decisions") or []:
             if decision is None:
                 continue
             did = str(decision.get("id", ""))
@@ -384,19 +379,21 @@ async def get_decisions_for_file(
                 continue
             seen_decision_ids.add(did)
             decision_id_set.add(did)
-            results.append({
-                "decision_id": did,
-                "description": decision.get("description", ""),
-                "source_type": decision.get("source_type", ""),
-                "source_ref": decision.get("source_ref", ""),
-                "source_excerpt": "",
-                "meeting_date": "",
-                "speaker": "",
-                "ingested_at": str(decision.get("created_at", "")),
-                "status": decision.get("status", "ungrounded"),
-                "signoff": decision.get("signoff"),
-                "code_region": region,
-            })
+            results.append(
+                {
+                    "decision_id": did,
+                    "description": decision.get("description", ""),
+                    "source_type": decision.get("source_type", ""),
+                    "source_ref": decision.get("source_ref", ""),
+                    "source_excerpt": "",
+                    "meeting_date": "",
+                    "speaker": "",
+                    "ingested_at": str(decision.get("created_at", "")),
+                    "status": decision.get("status", "ungrounded"),
+                    "signoff": decision.get("signoff"),
+                    "code_region": region,
+                }
+            )
 
     # Backfill source_excerpt + meeting_date via yields reverse edge
     if decision_id_set:
@@ -412,14 +409,11 @@ async def get_decisions_for_file(
         )
         excerpt_by_decision: dict[str, tuple[str, str]] = {}
         desc_by_decision = {e["decision_id"]: e.get("description", "") for e in results}
-        for r in (excerpt_rows or []):
+        for r in excerpt_rows or []:
             did = str(r.get("decision_id", ""))
             desc = desc_by_decision.get(did, "")
             spans = r.get("source_spans") or []
-            real_spans = [
-                s for s in spans
-                if s and s.get("text") and s.get("text") != desc
-            ]
+            real_spans = [s for s in spans if s and s.get("text") and s.get("text") != desc]
             first = real_spans[0] if real_spans else None
             if first:
                 excerpt_by_decision[did] = (
@@ -486,7 +480,7 @@ async def get_decisions_for_files(
             "purpose": region_row.get("purpose", ""),
             "content_hash": region_row.get("content_hash", ""),
         }
-        for decision in (region_row.get("decisions") or []):
+        for decision in region_row.get("decisions") or []:
             if decision is None:
                 continue
             did = str(decision.get("id", ""))
@@ -494,18 +488,20 @@ async def get_decisions_for_files(
                 continue
             seen_decision_ids.add(did)
             decision_id_set.add(did)
-            results.append({
-                "decision_id": did,
-                "description": decision.get("description", ""),
-                "source_type": decision.get("source_type", ""),
-                "source_ref": decision.get("source_ref", ""),
-                "source_excerpt": "",
-                "meeting_date": "",
-                "ingested_at": str(decision.get("created_at", "")),
-                "status": decision.get("status", "ungrounded"),
-                "signoff": decision.get("signoff"),
-                "code_region": region,
-            })
+            results.append(
+                {
+                    "decision_id": did,
+                    "description": decision.get("description", ""),
+                    "source_type": decision.get("source_type", ""),
+                    "source_ref": decision.get("source_ref", ""),
+                    "source_excerpt": "",
+                    "meeting_date": "",
+                    "ingested_at": str(decision.get("created_at", "")),
+                    "status": decision.get("status", "ungrounded"),
+                    "signoff": decision.get("signoff"),
+                    "code_region": region,
+                }
+            )
 
     # Backfill source_excerpt + meeting_date
     if decision_id_set:
@@ -521,14 +517,11 @@ async def get_decisions_for_files(
         )
         desc_by_decision = {e["decision_id"]: e.get("description", "") for e in results}
         excerpt_by_decision: dict[str, tuple[str, str]] = {}
-        for r in (excerpt_rows or []):
+        for r in excerpt_rows or []:
             did = str(r.get("decision_id", ""))
             desc = desc_by_decision.get(did, "")
             spans = r.get("source_spans") or []
-            real_spans = [
-                s for s in spans
-                if s and s.get("text") and s.get("text") != desc
-            ]
+            real_spans = [s for s in spans if s and s.get("text") and s.get("text") != desc]
             first = real_spans[0] if real_spans else None
             if first:
                 excerpt_by_decision[did] = (
@@ -710,9 +703,13 @@ async def upsert_code_region(
         WHERE file_path = $file_path AND symbol_name = $symbol_name
         """,
         {
-            "file_path": file_path, "symbol_name": symbol_name,
-            "start_line": start_line, "end_line": end_line,
-            "purpose": purpose, "repo": repo, "content_hash": content_hash,
+            "file_path": file_path,
+            "symbol_name": symbol_name,
+            "start_line": start_line,
+            "end_line": end_line,
+            "purpose": purpose,
+            "repo": repo,
+            "content_hash": content_hash,
         },
     )
     if rows:
@@ -949,6 +946,7 @@ async def update_decision_status(
 # stable across authors and machines, so it's the only id safe to ship
 # across the JSONL event log.
 
+
 async def get_canonical_id(
     client: LedgerClient,
     decision_id: str,
@@ -1389,13 +1387,17 @@ async def search_context_pending_by_text(
     total = len(rows)
     for i, row in enumerate(rows):
         signoff = row.get("signoff")
-        if not (signoff and isinstance(signoff, dict) and signoff.get("state") == "context_pending"):
+        if not (
+            signoff and isinstance(signoff, dict) and signoff.get("state") == "context_pending"
+        ):
             continue
-        results.append({
-            "decision_id": row.get("decision_id", ""),
-            "description": row.get("description", ""),
-            "overlap_score": round(1.0 - (i / max(total, 1)) * 0.4, 2),
-        })
+        results.append(
+            {
+                "decision_id": row.get("decision_id", ""),
+                "description": row.get("description", ""),
+                "overlap_score": round(1.0 - (i / max(total, 1)) * 0.4, 2),
+            }
+        )
         if len(results) >= top_k:
             break
     return results
@@ -1471,7 +1473,7 @@ async def get_context_for_ready_decisions(
 # shape and raises ``LedgerError`` on mismatch — a single choke point
 # per call instead of trusting upstream callers.
 
-import re as _re
+import re as _re  # noqa: E402
 
 _RECORD_ID_RE = _re.compile(r"^[A-Za-z_][A-Za-z0-9_]*:[A-Za-z0-9_\-]+$")
 
@@ -1514,8 +1516,10 @@ async def upsert_code_subject(
         WHERE kind = $kind AND canonical_name = $name
         """,
         {
-            "kind": kind, "name": canonical_name,
-            "repo_ref": repo_ref, "conf": current_confidence,
+            "kind": kind,
+            "name": canonical_name,
+            "repo_ref": repo_ref,
+            "conf": current_confidence,
         },
     )
     if rows:
@@ -1524,8 +1528,10 @@ async def upsert_code_subject(
         "CREATE code_subject SET kind=$kind, canonical_name=$name, "
         "repo_ref=$repo_ref, current_confidence=$conf",
         {
-            "kind": kind, "name": canonical_name,
-            "repo_ref": repo_ref, "conf": current_confidence,
+            "kind": kind,
+            "name": canonical_name,
+            "repo_ref": repo_ref,
+            "conf": current_confidence,
         },
     )
     return str(rows[0].get("id", "")) if rows else ""
@@ -1608,8 +1614,7 @@ async def relate_has_identity(
     siid = _validated_record_id(subject_identity_id, "subject_identity")
     await _execute_idempotent_edge(
         client,
-        f"RELATE {csid}->has_identity->{siid} "
-        "SET confidence=$c, created_at=time::now()",
+        f"RELATE {csid}->has_identity->{siid} SET confidence=$c, created_at=time::now()",
         {"c": confidence},
     )
 
@@ -1625,8 +1630,7 @@ async def link_decision_to_subject(
     csid = _validated_record_id(code_subject_id, "code_subject")
     await _execute_idempotent_edge(
         client,
-        f"RELATE {did}->about->{csid} "
-        "SET confidence=$c, created_at=time::now()",
+        f"RELATE {did}->about->{csid} SET confidence=$c, created_at=time::now()",
         {"c": confidence},
     )
 
diff --git a/ledger/schema.py b/ledger/schema.py
index f7d475f6..0a417bd6 100644
--- a/ledger/schema.py
+++ b/ledger/schema.py
@@ -14,6 +14,7 @@
 from __future__ import annotations
 
 import logging
+from datetime import UTC
 
 from .client import LedgerClient, LedgerError
 
@@ -38,7 +39,7 @@
     7: "0.8.0",
     8: "0.9.0",
     9: "0.9.3",
-    11: "0.11.0",   # placeholder; release-eng pins final value at PR merge
+    11: "0.11.0",  # placeholder; release-eng pins final value at PR merge
 }
 
 # Migrations that drop or recreate tables/data. These are never auto-applied;
@@ -72,16 +73,14 @@ class SchemaVersionTooNew(LedgerError):
 # Core tables
 _TABLES = [
     # ── Decision tier ────────────────────────────────────────────────────
-
     # input_span — raw verbatim text excerpt from a meeting, PRD, Slack, or
     # implementation-time rationale. "What was said / written."
     # text is required — no DEFAULT. A span without verbatim text is rejected
     # at the ingest contract boundary (IngestDecision.source_excerpt must be
     # non-empty). See v0.5.0 plan §Core Principle.
     "DEFINE TABLE input_span SCHEMAFULL",
-    "DEFINE FIELD text           ON input_span TYPE string "
-    "ASSERT string::len($value) > 0",
-    "DEFINE FIELD source_type    ON input_span TYPE string",       # transcript | notion | slack | document | manual | implementation_choice
+    "DEFINE FIELD text           ON input_span TYPE string ASSERT string::len($value) > 0",
+    "DEFINE FIELD source_type    ON input_span TYPE string",  # transcript | notion | slack | document | manual | implementation_choice
     "DEFINE FIELD source_ref     ON input_span TYPE string DEFAULT ''",  # meeting ID, page URL, etc.
     "DEFINE FIELD speakers       ON input_span TYPE array<string> DEFAULT []",
     "DEFINE FIELD meeting_date   ON input_span TYPE string DEFAULT ''",
@@ -89,7 +88,6 @@ class SchemaVersionTooNew(LedgerError):
     "DEFINE INDEX idx_input_span_ref   ON input_span FIELDS source_type, source_ref",
     # Dedup: same excerpt from same source is the same span
     "DEFINE INDEX idx_input_span_dedup ON input_span FIELDS source_type, source_ref, text UNIQUE",
-
     # decision — extracted decision / requirement. "What was decided."
     # Denormalized source fields (source_type, source_ref, speakers, meeting_date)
     # are kept for query speed; they mirror the linked input_span but are never
@@ -122,9 +120,7 @@ class SchemaVersionTooNew(LedgerError):
     "SEARCH ANALYZER biz_analyzer BM25(1.2, 0.75) HIGHLIGHTS",
     # Powers the "awaiting signoff" PM dashboard queue
     "DEFINE INDEX idx_decision_signoff ON decision FIELDS signoff",
-
     # ── Shared / unchanged ──────────────────────────────────────────────
-
     # symbol — a named code entity (function, class, file). Retrieval-tier only.
     "DEFINE TABLE symbol SCHEMAFULL",
     "DEFINE FIELD name           ON symbol TYPE string",
@@ -134,12 +130,11 @@ class SchemaVersionTooNew(LedgerError):
     "DEFINE FIELD hit_count      ON symbol TYPE int DEFAULT 0",
     "DEFINE INDEX idx_sym_name   ON symbol FIELDS name SEARCH ANALYZER code_analyzer BM25(1.2, 0.75)",
     "DEFINE INDEX idx_sym_file   ON symbol FIELDS file_path",
-
     # code_region — a specific span within a file. Shared between the two tiers:
     # decision tier addresses it via binds_to; retrieval tier via locates.
     "DEFINE TABLE code_region SCHEMAFULL CHANGEFEED 30d INCLUDE ORIGINAL",
     "DEFINE FIELD file_path      ON code_region TYPE string",
-    "DEFINE FIELD symbol_name    ON code_region TYPE string",   # display-only metadata, not a graph edge target
+    "DEFINE FIELD symbol_name    ON code_region TYPE string",  # display-only metadata, not a graph edge target
     "DEFINE FIELD start_line     ON code_region TYPE int",
     "DEFINE FIELD end_line       ON code_region TYPE int",
     "DEFINE FIELD purpose        ON code_region TYPE string DEFAULT ''",
@@ -148,7 +143,6 @@ class SchemaVersionTooNew(LedgerError):
     "DEFINE FIELD content_hash   ON code_region TYPE string DEFAULT ''",
     "DEFINE INDEX idx_region_sym  ON code_region FIELDS symbol_name",
     "DEFINE INDEX idx_region_file ON code_region FIELDS repo, file_path",
-
     # vocab_cache — grounding reuse cache for query→code_region lookups
     "DEFINE TABLE vocab_cache SCHEMAFULL",
     "DEFINE FIELD query_text     ON vocab_cache TYPE string",
@@ -158,14 +152,12 @@ class SchemaVersionTooNew(LedgerError):
     "DEFINE FIELD last_hit       ON vocab_cache TYPE datetime DEFAULT time::now()",
     "DEFINE INDEX idx_vocab_query ON vocab_cache FIELDS query_text SEARCH ANALYZER biz_analyzer BM25(1.2, 0.75)",
     "DEFINE INDEX idx_vocab_repo  ON vocab_cache FIELDS repo",
-
     # ledger_sync — idempotency cursor (last synced commit per repo)
     "DEFINE TABLE ledger_sync SCHEMAFULL",
     "DEFINE FIELD repo               ON ledger_sync TYPE string",
     "DEFINE FIELD last_synced_commit ON ledger_sync TYPE string",
     "DEFINE FIELD synced_at          ON ledger_sync TYPE datetime DEFAULT time::now()",
     "DEFINE INDEX idx_sync_repo ON ledger_sync FIELDS repo UNIQUE",
-
     # source_cursor — upstream ingestion checkpoint per source stream
     "DEFINE TABLE source_cursor SCHEMAFULL",
     "DEFINE FIELD repo            ON source_cursor TYPE string",
@@ -177,13 +169,12 @@ class SchemaVersionTooNew(LedgerError):
     "DEFINE FIELD status          ON source_cursor TYPE string DEFAULT 'ok'",
     "DEFINE FIELD error           ON source_cursor TYPE string DEFAULT ''",
     "DEFINE INDEX idx_source_cursor ON source_cursor FIELDS repo, source_type, source_scope UNIQUE",
-
     # compliance_check — LLM verification cache.
     # Cache key: (decision_id, region_id, content_hash) — one verdict per code shape.
     # pruned=true means the caller said "not_relevant" — retrieval mistake, binds_to
     # edge has been deleted. Row kept for audit trail.
     "DEFINE TABLE compliance_check SCHEMAFULL",
-    "DEFINE FIELD decision_id  ON compliance_check TYPE string",   # renamed from intent_id
+    "DEFINE FIELD decision_id  ON compliance_check TYPE string",  # renamed from intent_id
     "DEFINE FIELD region_id    ON compliance_check TYPE string",
     "DEFINE FIELD content_hash ON compliance_check TYPE string",
     "DEFINE FIELD commit_hash  ON compliance_check TYPE string DEFAULT ''",
@@ -203,7 +194,6 @@ class SchemaVersionTooNew(LedgerError):
     "DEFINE INDEX idx_cc_region    ON compliance_check FIELDS region_id",
     "DEFINE INDEX idx_cc_commit    ON compliance_check FIELDS commit_hash",
     "DEFINE INDEX idx_cc_ephemeral ON compliance_check FIELDS ephemeral",
-
     # graph_proposal — AI-generated edge proposals for human review.
     # from_id / to_id are TYPE string (not TYPE record) because this table can
     # link across different node types. Traverse via type::thing($from_id).
@@ -220,12 +210,10 @@ class SchemaVersionTooNew(LedgerError):
     "DEFINE FIELD session_id    ON graph_proposal TYPE string DEFAULT ''",
     "DEFINE FIELD created_at    ON graph_proposal TYPE datetime DEFAULT time::now()",
     "DEFINE FIELD reviewed_at   ON graph_proposal TYPE option<datetime> DEFAULT NONE",
-
     # ── CodeGenome tier (v11, additive — Phase 1+2 / #59) ───────────────
     # All writes are gated by codegenome.write_identity_records=True at the
     # handler boundary. Tables exist unconditionally so toggling the flag
     # mid-deployment does not require a migration.
-
     # code_subject — a conceptual code target (function, class, module…)
     # that can survive movement across files. Distinct from `symbol`,
     # which is keyed on name+kind at one point in time.
@@ -233,13 +221,10 @@ class SchemaVersionTooNew(LedgerError):
     "DEFINE FIELD kind               ON code_subject TYPE string",
     "DEFINE FIELD canonical_name     ON code_subject TYPE string",
     "DEFINE FIELD repo_ref           ON code_subject TYPE option<string>",
-    "DEFINE FIELD current_confidence ON code_subject TYPE float "
-    "ASSERT $value >= 0 AND $value <= 1",
+    "DEFINE FIELD current_confidence ON code_subject TYPE float ASSERT $value >= 0 AND $value <= 1",
     "DEFINE FIELD created_at         ON code_subject TYPE datetime DEFAULT time::now()",
     "DEFINE FIELD updated_at         ON code_subject TYPE datetime DEFAULT time::now()",
-    "DEFINE INDEX idx_code_subject_canonical "
-    "ON code_subject FIELDS kind, canonical_name UNIQUE",
-
+    "DEFINE INDEX idx_code_subject_canonical ON code_subject FIELDS kind, canonical_name UNIQUE",
     # subject_identity — durable fingerprint for one observation of a
     # code_subject. Phase 3 (#60) will add a supersedes edge between
     # identities; not defined yet.
@@ -255,7 +240,6 @@ class SchemaVersionTooNew(LedgerError):
     "DEFINE FIELD model_version        ON subject_identity TYPE string",
     "DEFINE FIELD created_at           ON subject_identity TYPE datetime DEFAULT time::now()",
     "DEFINE INDEX idx_subject_identity_address ON subject_identity FIELDS address UNIQUE",
-
     # subject_version — concrete location/symbol observation at one
     # repo_ref. Phase 3 (#60) will write versions when a continuity match
     # resolves a relocation; Phase 1+2 only defines the table (foundation
@@ -291,27 +275,23 @@ class SchemaVersionTooNew(LedgerError):
     "DEFINE TABLE yields SCHEMAFULL TYPE RELATION IN input_span OUT decision",
     "DEFINE FIELD created_at ON yields TYPE datetime DEFAULT time::now()",
     "DEFINE INDEX idx_yields_unique ON yields FIELDS in, out UNIQUE",
-
     # decision → code_region (direct binding — decision tier only)
     "DEFINE TABLE binds_to SCHEMAFULL TYPE RELATION IN decision OUT code_region",
     "DEFINE FIELD confidence ON binds_to TYPE float ASSERT $value >= 0 AND $value <= 1",
     "DEFINE FIELD provenance ON binds_to TYPE object DEFAULT {}",
     "DEFINE FIELD created_at ON binds_to TYPE datetime DEFAULT time::now()",
     "DEFINE INDEX idx_binds_to_unique ON binds_to FIELDS in, out UNIQUE",
-
     # symbol → code_region (retrieval tier — BM25 / graph / future embeddings)
     "DEFINE TABLE locates SCHEMAFULL TYPE RELATION IN symbol OUT code_region",
     "DEFINE FIELD confidence ON locates TYPE float ASSERT $value >= 0 AND $value <= 1",
     "DEFINE FIELD created_at ON locates TYPE datetime DEFAULT time::now()",
     "DEFINE INDEX idx_locates_unique ON locates FIELDS in, out UNIQUE",
-
     # decision → decision (human-confirmed supersession — v0.8.0 HITL)
     "DEFINE TABLE supersedes SCHEMAFULL TYPE RELATION IN decision OUT decision",
     "DEFINE FIELD confidence  ON supersedes TYPE float",
     "DEFINE FIELD reason      ON supersedes TYPE string DEFAULT ''",
     "DEFINE FIELD created_at  ON supersedes TYPE datetime DEFAULT time::now()",
     "DEFINE INDEX idx_supersedes_unique ON supersedes FIELDS in, out UNIQUE",
-
     # input_span → decision (human-confirmed context provision — v0.8.0 HITL)
     "DEFINE TABLE context_for SCHEMAFULL TYPE RELATION IN input_span OUT decision",
     "DEFINE FIELD relevance_score ON context_for TYPE float",
@@ -320,34 +300,26 @@ class SchemaVersionTooNew(LedgerError):
     "ASSERT $value IN ['proposed', 'confirmed', 'rejected'] DEFAULT 'proposed'",
     "DEFINE FIELD created_at      ON context_for TYPE datetime DEFAULT time::now()",
     "DEFINE INDEX idx_ctx_unique ON context_for FIELDS in, out UNIQUE",
-
     # code_region → code_region (structural dependency — unchanged)
     "DEFINE TABLE depends_on SCHEMAFULL TYPE RELATION IN code_region OUT code_region",
     "DEFINE FIELD edge_type  ON depends_on TYPE string",
     "DEFINE FIELD created_at ON depends_on TYPE datetime DEFAULT time::now()",
     "DEFINE INDEX idx_depends_on_unique ON depends_on FIELDS in, out, edge_type UNIQUE",
-
     # ── CodeGenome edges (v11, additive — Phase 1+2 / #59) ──────────────
-
     # code_subject → has_identity → subject_identity
     "DEFINE TABLE has_identity SCHEMAFULL TYPE RELATION IN code_subject OUT subject_identity",
-    "DEFINE FIELD confidence ON has_identity TYPE float "
-    "ASSERT $value >= 0 AND $value <= 1",
+    "DEFINE FIELD confidence ON has_identity TYPE float ASSERT $value >= 0 AND $value <= 1",
     "DEFINE FIELD created_at ON has_identity TYPE datetime DEFAULT time::now()",
     "DEFINE INDEX idx_has_identity_unique ON has_identity FIELDS in, out UNIQUE",
-
     # code_subject → has_version → subject_version
     "DEFINE TABLE has_version SCHEMAFULL TYPE RELATION IN code_subject OUT subject_version",
-    "DEFINE FIELD confidence ON has_version TYPE float "
-    "ASSERT $value >= 0 AND $value <= 1",
+    "DEFINE FIELD confidence ON has_version TYPE float ASSERT $value >= 0 AND $value <= 1",
     "DEFINE FIELD created_at ON has_version TYPE datetime DEFAULT time::now()",
     "DEFINE INDEX idx_has_version_unique ON has_version FIELDS in, out UNIQUE",
-
     # decision → about → code_subject (used by find_subject_identities_for_decision
     # to walk decision → subject → identity in two hops)
     "DEFINE TABLE about SCHEMAFULL TYPE RELATION IN decision OUT code_subject",
-    "DEFINE FIELD confidence ON about TYPE float "
-    "ASSERT $value >= 0 AND $value <= 1",
+    "DEFINE FIELD confidence ON about TYPE float ASSERT $value >= 0 AND $value <= 1",
     "DEFINE FIELD created_at ON about TYPE datetime DEFAULT time::now()",
     "DEFINE INDEX idx_about_unique ON about FIELDS in, out UNIQUE",
 ]
@@ -368,9 +340,15 @@ def _with_overwrite(sql: str) -> str:
     the current field constraints (ASSERT clauses, DEFAULT values, TYPE) even
     when the field already exists in the DB.
     """
-    for keyword in ("DEFINE TABLE", "DEFINE FIELD", "DEFINE INDEX", "DEFINE ANALYZER", "DEFINE EVENT"):
+    for keyword in (
+        "DEFINE TABLE",
+        "DEFINE FIELD",
+        "DEFINE INDEX",
+        "DEFINE ANALYZER",
+        "DEFINE EVENT",
+    ):
         if sql.upper().startswith(keyword) and "OVERWRITE" not in sql.upper():
-            return keyword + " OVERWRITE" + sql[len(keyword):]
+            return keyword + " OVERWRITE" + sql[len(keyword) :]
     return sql
 
 
@@ -402,7 +380,7 @@ async def init_schema(client: LedgerClient) -> None:
     clauses, DEFAULT values, TYPE) are always brought up to the current schema
     definition — even when running against a DB created by an older version.
     """
-    for sql in (_ANALYZERS + _TABLES + _EDGES + _META):
+    for sql in _ANALYZERS + _TABLES + _EDGES + _META:
         sql = sql.strip()
         if sql:
             await _execute_define_idempotent(client, _with_overwrite(sql))
@@ -410,6 +388,7 @@ async def init_schema(client: LedgerClient) -> None:
 
 # ── Migrations ──────────────────────────────────────────────────────────
 
+
 async def _migrate_v4_to_v5(client: LedgerClient) -> None:
     """v4 → v5: Remove stale v3-era yields edges and deduplicate.
 
@@ -431,7 +410,7 @@ async def _migrate_v4_to_v5(client: LedgerClient) -> None:
             "WHERE string::starts_with(type::string(in), 'source_span:') "
             "   OR string::starts_with(type::string(out), 'intent:')"
         )
-        for row in (stale or []):
+        for row in stale or []:
             try:
                 await client.execute(f"DELETE {row['id']}")
             except Exception:
@@ -448,7 +427,7 @@ async def _migrate_v4_to_v5(client: LedgerClient) -> None:
         all_yields = await client.query("SELECT id, in, out FROM yields")
         seen: set[tuple[str, str]] = set()
         removed = 0
-        for row in (all_yields or []):
+        for row in all_yields or []:
             key = (str(row.get("in", "")), str(row.get("out", "")))
             if key in seen:
                 try:
@@ -482,16 +461,14 @@ async def _migrate_v5_to_v6(client: LedgerClient) -> None:
 
     New ingests after v0.7.0 write signoff = {state:'proposed', ...} by default.
     """
-    from datetime import datetime, timezone
+    from datetime import datetime
 
-    now_iso = datetime.now(timezone.utc).isoformat()
+    now_iso = datetime.now(UTC).isoformat()
 
     try:
-        all_decisions = await client.query(
-            "SELECT id, product_signoff FROM decision"
-        )
+        all_decisions = await client.query("SELECT id, product_signoff FROM decision")
         migrated = 0
-        for row in (all_decisions or []):
+        for row in all_decisions or []:
             decision_id = str(row.get("id", ""))
             old_signoff = row.get("product_signoff")
 
@@ -560,7 +537,6 @@ async def _migrate_v6_to_v7(client: LedgerClient) -> None:
         "DEFINE FIELD reason      ON supersedes TYPE string DEFAULT ''",
         "DEFINE FIELD created_at  ON supersedes TYPE datetime DEFAULT time::now()",
         "DEFINE INDEX idx_supersedes_unique ON supersedes FIELDS in, out UNIQUE",
-
         "DEFINE TABLE context_for SCHEMAFULL TYPE RELATION IN input_span OUT decision",
         "DEFINE FIELD relevance_score ON context_for TYPE float",
         "DEFINE FIELD reason          ON context_for TYPE string DEFAULT ''",
@@ -568,7 +544,6 @@ async def _migrate_v6_to_v7(client: LedgerClient) -> None:
         "ASSERT $value IN ['proposed', 'confirmed', 'rejected'] DEFAULT 'proposed'",
         "DEFINE FIELD created_at      ON context_for TYPE datetime DEFAULT time::now()",
         "DEFINE INDEX idx_ctx_unique ON context_for FIELDS in, out UNIQUE",
-
         # Proposal infrastructure (AI does not write here yet)
         "DEFINE TABLE graph_proposal SCHEMAFULL",
         "DEFINE FIELD proposal_type ON graph_proposal TYPE string "
@@ -582,7 +557,6 @@ async def _migrate_v6_to_v7(client: LedgerClient) -> None:
         "DEFINE FIELD session_id    ON graph_proposal TYPE string DEFAULT ''",
         "DEFINE FIELD created_at    ON graph_proposal TYPE datetime DEFAULT time::now()",
         "DEFINE FIELD reviewed_at   ON graph_proposal TYPE option<datetime> DEFAULT NONE",
-
         # Expanded status ASSERT (v6→v7 era; narrowed again in v10)
         "DEFINE FIELD status ON decision TYPE string DEFAULT 'ungrounded' "
         "ASSERT $value IN ['reflected', 'drifted', 'pending', 'ungrounded', "
@@ -609,7 +583,9 @@ async def _migrate_v7_to_v8(client: LedgerClient) -> None:
 
     try:
         await client.execute("UPDATE compliance_check SET ephemeral = false WHERE ephemeral = NONE")
-        logger.info("[migration] v7 → v8: backfilled compliance_check.ephemeral = false on existing rows")
+        logger.info(
+            "[migration] v7 → v8: backfilled compliance_check.ephemeral = false on existing rows"
+        )
     except Exception as exc:
         logger.warning("[migration] v7 → v8: backfill failed (non-fatal): %s", exc)
 
@@ -651,15 +627,16 @@ async def _migrate_v9_to_v10(client: LedgerClient) -> None:
        code-compliance status will be re-derived on the next drift sweep.
     3. Tighten the ASSERT constraint on the status field.
     """
-    from datetime import datetime, timezone
-    _now = datetime.now(timezone.utc).isoformat()
+    from datetime import datetime
+
+    _now = datetime.now(UTC).isoformat()
 
     # Step 1: superseded decisions — move superseded into signoff
     superseded_rows = await client.query(
         "SELECT type::string(id) AS id, signoff FROM decision WHERE status = 'superseded'"
     )
     migrated_superseded = 0
-    for row in (superseded_rows or []):
+    for row in superseded_rows or []:
         decision_id = row.get("id", "")
         existing_signoff = row.get("signoff") or {}
         if not decision_id:
@@ -684,8 +661,7 @@ async def _migrate_v9_to_v10(client: LedgerClient) -> None:
     # (their signoff already carries the right state; the status field was a
     # projection artifact of the old project_decision_status short-circuits)
     await client.execute(
-        "UPDATE decision SET status = 'ungrounded' "
-        "WHERE status IN ['proposal', 'context_pending']"
+        "UPDATE decision SET status = 'ungrounded' WHERE status IN ['proposal', 'context_pending']"
     )
 
     # Step 3: tighten ASSERT
@@ -723,7 +699,6 @@ async def _migrate_v10_to_v11(client: LedgerClient) -> None:
         "DEFINE FIELD updated_at         ON code_subject TYPE datetime DEFAULT time::now()",
         "DEFINE INDEX idx_code_subject_canonical "
         "ON code_subject FIELDS kind, canonical_name UNIQUE",
-
         "DEFINE TABLE subject_identity SCHEMAFULL",
         "DEFINE FIELD address              ON subject_identity TYPE string",
         "DEFINE FIELD identity_type        ON subject_identity TYPE string",
@@ -735,9 +710,7 @@ async def _migrate_v10_to_v11(client: LedgerClient) -> None:
         "ASSERT $value >= 0 AND $value <= 1",
         "DEFINE FIELD model_version        ON subject_identity TYPE string",
         "DEFINE FIELD created_at           ON subject_identity TYPE datetime DEFAULT time::now()",
-        "DEFINE INDEX idx_subject_identity_address "
-        "ON subject_identity FIELDS address UNIQUE",
-
+        "DEFINE INDEX idx_subject_identity_address ON subject_identity FIELDS address UNIQUE",
         "DEFINE TABLE subject_version SCHEMAFULL",
         "DEFINE FIELD repo_ref       ON subject_version TYPE string",
         "DEFINE FIELD file_path      ON subject_version TYPE string",
@@ -750,23 +723,17 @@ async def _migrate_v10_to_v11(client: LedgerClient) -> None:
         "DEFINE FIELD created_at     ON subject_version TYPE datetime DEFAULT time::now()",
         "DEFINE INDEX idx_subject_version_loc "
         "ON subject_version FIELDS repo_ref, file_path, start_line, end_line",
-
         # Edges
         "DEFINE TABLE has_identity SCHEMAFULL TYPE RELATION IN code_subject OUT subject_identity",
-        "DEFINE FIELD confidence ON has_identity TYPE float "
-        "ASSERT $value >= 0 AND $value <= 1",
+        "DEFINE FIELD confidence ON has_identity TYPE float ASSERT $value >= 0 AND $value <= 1",
         "DEFINE FIELD created_at ON has_identity TYPE datetime DEFAULT time::now()",
         "DEFINE INDEX idx_has_identity_unique ON has_identity FIELDS in, out UNIQUE",
-
         "DEFINE TABLE has_version SCHEMAFULL TYPE RELATION IN code_subject OUT subject_version",
-        "DEFINE FIELD confidence ON has_version TYPE float "
-        "ASSERT $value >= 0 AND $value <= 1",
+        "DEFINE FIELD confidence ON has_version TYPE float ASSERT $value >= 0 AND $value <= 1",
         "DEFINE FIELD created_at ON has_version TYPE datetime DEFAULT time::now()",
         "DEFINE INDEX idx_has_version_unique ON has_version FIELDS in, out UNIQUE",
-
         "DEFINE TABLE about SCHEMAFULL TYPE RELATION IN decision OUT code_subject",
-        "DEFINE FIELD confidence ON about TYPE float "
-        "ASSERT $value >= 0 AND $value <= 1",
+        "DEFINE FIELD confidence ON about TYPE float ASSERT $value >= 0 AND $value <= 1",
         "DEFINE FIELD created_at ON about TYPE datetime DEFAULT time::now()",
         "DEFINE INDEX idx_about_unique ON about FIELDS in, out UNIQUE",
     ]
@@ -831,7 +798,9 @@ async def migrate(client: LedgerClient, allow_destructive: bool = False) -> None
 
     logger.info(
         "[migration] Schema version %d → %d (%d migration(s) to apply)",
-        current, SCHEMA_VERSION, SCHEMA_VERSION - current,
+        current,
+        SCHEMA_VERSION,
+        SCHEMA_VERSION - current,
     )
 
     for target_version in range(current + 1, SCHEMA_VERSION + 1):
diff --git a/ledger/status.py b/ledger/status.py
index c25faf88..91c64546 100644
--- a/ledger/status.py
+++ b/ledger/status.py
@@ -44,7 +44,10 @@ def resolve_symbol_lines(
         try:
             result = subprocess.run(
                 ["git", "show", f"{ref}:{file_path}"],
-                cwd=abs_repo, capture_output=True, text=True, timeout=10,
+                cwd=abs_repo,
+                capture_output=True,
+                text=True,
+                timeout=10,
             )
             if result.returncode != 0:
                 return None
@@ -57,9 +60,15 @@ def resolve_symbol_lines(
 
         ext = Path(file_path).suffix
         lang_map = {
-            ".py": "python", ".js": "javascript", ".jsx": "javascript",
-            ".ts": "typescript", ".tsx": "typescript", ".java": "java",
-            ".go": "go", ".rs": "rust", ".cs": "csharp",
+            ".py": "python",
+            ".js": "javascript",
+            ".jsx": "javascript",
+            ".ts": "typescript",
+            ".tsx": "typescript",
+            ".java": "java",
+            ".go": "go",
+            ".rs": "rust",
+            ".cs": "csharp",
         }
         lang = lang_map.get(ext)
         if lang is None:
@@ -67,19 +76,33 @@ def resolve_symbol_lines(
 
         symbols = extract_symbols_from_content(content, lang, file_path)
         for sym in symbols:
-            name = getattr(sym, "name", None) or (sym.get("name") if isinstance(sym, dict) else None)
-            qname = getattr(sym, "qualified_name", None) or (sym.get("qualified_name") if isinstance(sym, dict) else None)
-            sl = getattr(sym, "start_line", None) or (sym.get("start_line") if isinstance(sym, dict) else None)
-            el = getattr(sym, "end_line", None) or (sym.get("end_line") if isinstance(sym, dict) else None)
+            name = getattr(sym, "name", None) or (
+                sym.get("name") if isinstance(sym, dict) else None
+            )
+            qname = getattr(sym, "qualified_name", None) or (
+                sym.get("qualified_name") if isinstance(sym, dict) else None
+            )
+            sl = getattr(sym, "start_line", None) or (
+                sym.get("start_line") if isinstance(sym, dict) else None
+            )
+            el = getattr(sym, "end_line", None) or (
+                sym.get("end_line") if isinstance(sym, dict) else None
+            )
             if name == symbol_name or qname == symbol_name:
                 return (sl, el)
 
         # Try fuzzy: symbol_name might be unqualified
         bare = symbol_name.split(".")[-1] if "." in symbol_name else symbol_name
         for sym in symbols:
-            name = getattr(sym, "name", None) or (sym.get("name") if isinstance(sym, dict) else None)
-            sl = getattr(sym, "start_line", None) or (sym.get("start_line") if isinstance(sym, dict) else None)
-            el = getattr(sym, "end_line", None) or (sym.get("end_line") if isinstance(sym, dict) else None)
+            name = getattr(sym, "name", None) or (
+                sym.get("name") if isinstance(sym, dict) else None
+            )
+            sl = getattr(sym, "start_line", None) or (
+                sym.get("start_line") if isinstance(sym, dict) else None
+            )
+            el = getattr(sym, "end_line", None) or (
+                sym.get("end_line") if isinstance(sym, dict) else None
+            )
             if name == bare:
                 return (sl, el)
 
@@ -159,12 +182,12 @@ def compute_content_hash(
     content = get_git_content(file_path, start_line, end_line, repo_path, ref)
     if content is None:
         return None
-    # Validate line range (warn but still hash — shorter file = drift signal)
-    line_count = len(content.splitlines())
     if start_line < 1 or end_line < start_line:
         logger.warning(
             "[status] Invalid range %d:%d for %s",
-            start_line, end_line, file_path,
+            start_line,
+            end_line,
+            file_path,
         )
         return None
     return hash_lines(content, start_line, end_line)
@@ -259,7 +282,9 @@ def get_changed_files_in_range(
         if result.returncode != 0:
             logger.warning(
                 "[status] git diff %s..%s failed: %s",
-                base_sha[:8], head_sha[:8], result.stderr[:200],
+                base_sha[:8],
+                head_sha[:8],
+                result.stderr[:200],
             )
             return None
         return [f.strip() for f in result.stdout.strip().splitlines() if f.strip()]
diff --git a/local_counters.py b/local_counters.py
index 7c8a1d8e..72b2e21a 100644
--- a/local_counters.py
+++ b/local_counters.py
@@ -23,11 +23,11 @@
 import json
 import logging
 import os
-import sys
 import threading
 from collections import Counter
-from datetime import datetime, timezone
+from datetime import UTC, datetime
 from pathlib import Path
+from typing import IO
 
 logger = logging.getLogger(__name__)
 
@@ -41,7 +41,7 @@ def _enabled() -> bool:
     return val not in _OFF_VALUES
 
 
-def _open_for_append_secure(path: Path) -> "os.PathLike":
+def _open_for_append_secure(path: Path) -> IO[bytes]:
     """Open the counters file with 0o600 mode on POSIX (user-only)."""
     flags = os.O_WRONLY | os.O_CREAT | os.O_APPEND
     fd = os.open(str(path), flags, 0o600)
@@ -57,7 +57,7 @@ def increment(tool_name: str, *, delta: int = 1) -> None:
         record = {
             "tool": tool_name,
             "delta": int(delta),
-            "ts": datetime.now(timezone.utc).isoformat(),
+            "ts": datetime.now(UTC).isoformat(),
         }
         line = json.dumps(record, separators=(",", ":")) + "\n"
         with _LOCK:
diff --git a/ports.py b/ports.py
index a446d94c..9ba65809 100644
--- a/ports.py
+++ b/ports.py
@@ -10,10 +10,9 @@
 
 from __future__ import annotations
 
-from dataclasses import dataclass, field
+from dataclasses import dataclass
 from typing import Protocol, runtime_checkable
 
-
 # ── Drift Analysis ──────────────────────────────────────────────────────
 
 
diff --git a/pyproject.toml b/pyproject.toml
index cfd6c93f..308194d2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -48,12 +48,86 @@ test = [
   "pytest>=8.0.0",
   "pytest-asyncio>=0.23.0",
   "tiktoken>=0.7.0,<1.0.0",
+  "ruff>=0.5.0",
+  "mypy>=1.10.0",
 ]
 
 [project.scripts]
 bicameral-mcp = "server:cli_main"
+bicameral-mcp-preflight-reminder = "scripts.hooks.preflight_reminder:main"
+bicameral-mcp-post-commit-sync-reminder = "scripts.hooks.post_commit_sync_reminder:main"
+bicameral-mcp-collision-capture-reminder = "scripts.hooks.post_preflight_capture_reminder:main"
 
 [tool.hatch.build.targets.wheel]
 packages = ["."]
-exclude = ["tests", "visual-plan", "mocks", "test-results"]
+exclude = [
+  "tests",
+  "visual-plan",
+  "mocks",
+  "test-results",
+  "docs/demos/**/*.mp4",
+]
 artifacts = ["skills/**/*.md", "skills/**/*.yaml"]
+
+[tool.ruff]
+line-length = 100
+target-version = "py311"
+extend-exclude = [
+  "test-results",
+  "visual-plan",
+  "mocks",
+  ".agent",
+  ".claude",
+]
+
+[tool.ruff.lint]
+select = ["E", "F", "W", "I", "B", "UP"]  # pyflakes + pycodestyle + isort + bugbear + pyupgrade
+ignore = ["E501"]  # line-length handled by formatter
+
+[tool.ruff.lint.per-file-ignores]
+# Test files often reference module-internal symbols imported via patching,
+# use intentional unused-locals for clarity, and use assert-style equality.
+# Day-one CI keeps tests/ lenient; tighten in follow-up cleanup PRs.
+"tests/**" = ["F401", "F811", "F821", "F841", "E712", "B017", "B904", "E402", "E731"]
+"scripts/**" = ["F401", "F841", "E402", "E731"]
+
+[tool.mypy]
+python_version = "3.11"
+ignore_missing_imports = true  # project depends on pydantic, mcp, surrealdb — many unstubbed
+warn_return_any = false
+strict_optional = true
+disable_error_code = ["import-untyped"]  # missing third-party stubs (e.g. PyYAML) — chip away in follow-ups
+exclude = [
+  "test-results/",
+  "visual-plan/",
+  "mocks/",
+  ".agent/",
+  ".claude/",
+  "build/",
+  "dist/",
+  # Tests/fixtures aren't part of the production type surface; tighten in follow-ups.
+  "^tests/",
+  "^scripts/",
+]
+
+# Day-one mypy: noisy modules suppressed wholesale to keep CI green.
+# Each entry below is a follow-up cleanup target. Track in a separate type-cleanup
+# project — do NOT remove entries here without first fixing the underlying errors.
+[[tool.mypy.overrides]]
+module = [
+  "server",
+  "setup_wizard",
+  "code_locator.indexing.cocoindex_pipeline",
+  "code_locator.indexing.symbol_extractor",
+  "adapters.code_locator",
+  "ledger.adapter",
+  "ledger.status",
+  "ledger.queries",
+  "ledger.schema",
+  "handlers.ratify",
+  "handlers.search_decisions",
+  "handlers.resolve_compliance",
+  "handlers.preflight",
+  "handlers.detect_drift",
+]
+ignore_errors = true
diff --git a/scripts/__init__.py b/scripts/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/scripts/hooks/__init__.py b/scripts/hooks/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/scripts/hooks/post_commit_sync_reminder.py b/scripts/hooks/post_commit_sync_reminder.py
new file mode 100644
index 00000000..ed610472
--- /dev/null
+++ b/scripts/hooks/post_commit_sync_reminder.py
@@ -0,0 +1,82 @@
+"""PostToolUse hook for the ``Bash`` tool — git write-op detector.
+
+When the agent runs ``git commit`` / ``git merge`` / ``git pull`` /
+``git rebase --continue``, inject a system-reminder telling the agent to
+call ``/bicameral:sync`` so the decision ledger picks up the new HEAD,
+runs compliance checks, and produces authoritative reflected/drifted
+verdicts before the next user turn.
+
+Replaces the plain-stdout one-liner ``_BICAMERAL_POST_COMMIT_COMMAND``
+that previously lived inline in ``setup_wizard.py``. Per Claude Code
+2.x hook docs (https://code.claude.com/docs/en/hooks), plain stdout
+from PostToolUse hooks is silently dropped to the debug log — only
+UserPromptSubmit / UserPromptExpansion / SessionStart treat raw stdout
+as agent-visible context. Symptom: the agent committed but never
+followed through to call ``link_commit`` / ``/bicameral:sync`` because
+the reminder never reached the model. Fix: emit the structured
+envelope ``{"hookSpecificOutput": {"hookEventName": "PostToolUse",
+"additionalContext": "..."}}``.
+
+The reminder text preserves the canonical ``"bicameral: new commit
+detected"`` phrase — the ``bicameral-sync`` skill watches for that
+exact prefix as one of its trigger signals.
+
+Errors are swallowed silently (exit 0, empty response) so a broken
+hook never blocks a user.
+"""
+
+from __future__ import annotations
+
+import json
+import sys
+
+BASH_TOOL_NAME = "Bash"
+
+# Substrings that mark a git write-op against HEAD that the agent should
+# follow up with /bicameral:sync. Exact phrasing matches the legacy
+# inline command's tuple so behavior is byte-identical except for the
+# stdout envelope.
+WRITE_OP_MARKERS: tuple[str, ...] = (
+    "git commit",
+    "git merge ",
+    "git pull",
+    "git rebase --continue",
+)
+
+REMINDER_TEXT = (
+    "bicameral: new commit detected — run /bicameral:sync to resolve "
+    "compliance and get authoritative reflected/drifted status"
+)
+
+
+def _is_git_write_op(command: str) -> bool:
+    return any(marker in command for marker in WRITE_OP_MARKERS)
+
+
+def main() -> int:
+    try:
+        payload = json.load(sys.stdin)
+    except (json.JSONDecodeError, ValueError):
+        return 0
+    if not isinstance(payload, dict):
+        return 0
+    if payload.get("tool_name") != BASH_TOOL_NAME:
+        return 0
+    tool_input = payload.get("tool_input") or {}
+    command = tool_input.get("command", "") if isinstance(tool_input, dict) else ""
+    if not isinstance(command, str) or not _is_git_write_op(command):
+        return 0
+    json.dump(
+        {
+            "hookSpecificOutput": {
+                "hookEventName": "PostToolUse",
+                "additionalContext": REMINDER_TEXT,
+            }
+        },
+        sys.stdout,
+    )
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/scripts/hooks/post_preflight_capture_reminder.py b/scripts/hooks/post_preflight_capture_reminder.py
new file mode 100644
index 00000000..c2669274
--- /dev/null
+++ b/scripts/hooks/post_preflight_capture_reminder.py
@@ -0,0 +1,149 @@
+"""PostToolUse hook for ``bicameral.preflight``.
+
+When preflight surfaces ≥1 decision, inject a system-reminder templating
+the correction-capture loop (Step 5.6 of ``skills/bicameral-preflight``).
+Per #175, the agent does NOT judge contradiction itself — instead it
+asks the user via ``AskUserQuestion`` (Step 5.6.1) and acts mechanically
+on the answer (Step 5.6.2):
+
+  1. ``AskUserQuestion`` — disambiguate whether the current request is
+     a refinement of any surfaced decision. Three options: supersede,
+     keep_both, unrelated.
+  2. If 'supersede' or 'keep_both':
+     - ``bicameral.ingest(source="agent_session", ...)``
+     - ``bicameral.resolve_collision(new_id=..., old_id=..., action=...)``
+  3. If 'unrelated': skip capture, proceed to implementation.
+
+Why route the judgment to the user (path D in the #175 design discussion):
+prior implementations tried (a) a conditional "IF you contradict ..." gate
+which let the agent skip on borderline prompts, then (b) an unconditional
+"you MUST capture" gate which the agent still ignored on structural-
+mismatch prompts (e.g. "add programmatic API" vs "drag-and-drop UX"
+decision — agent rationalized "these can coexist" and skipped). The
+contradiction judgment is semantic, not lexical, and LLM-level inference
+is unreliable on it. The user is the only party with the actual intent;
+the skill puts the question to them.
+
+Trust contract preserved: the hook only fires when ``fired=True``
+AND ``len(decisions) > 0`` — silent on no signal. The question runs at
+a moment the flow is already paused (rendering the surfaced block).
+
+Per Claude Code 2.x hook contract: read JSON ``{tool_name, tool_input,
+tool_response}`` from stdin. ``tool_response`` is either a JSON string or
+a dict — both are handled. Output is the structured envelope
+``{"hookSpecificOutput": {"hookEventName": "PostToolUse",
+"additionalContext": "..."}}`` written to stdout; the CLI surfaces
+``additionalContext`` next to the tool result the model sees on the next
+turn. Plain stdout is silently dropped to the debug log for PostToolUse
+events (per https://code.claude.com/docs/en/hooks — only
+UserPromptSubmit / UserPromptExpansion / SessionStart treat raw stdout
+as agent-visible context). Errors swallowed silently (exit 0, empty
+response) so a broken hook never blocks a user.
+"""
+
+from __future__ import annotations
+
+import json
+import sys
+
+PREFLIGHT_TOOL_NAME = "mcp__bicameral__bicameral_preflight"
+
+
+def _coerce_response(raw: object) -> dict:
+    """Return a dict view of ``tool_response`` whether it arrived as a
+    JSON string or already-decoded dict. On any failure return ``{}`` —
+    the caller treats an empty dict as "no decisions to template."
+    """
+    if isinstance(raw, dict):
+        return raw
+    if isinstance(raw, str):
+        try:
+            decoded = json.loads(raw)
+        except (json.JSONDecodeError, ValueError):
+            return {}
+        return decoded if isinstance(decoded, dict) else {}
+    return {}
+
+
+def _format_reminder(decisions: list[dict]) -> str:
+    bullets = "\n".join(
+        f"  - {d.get('decision_id', '<unknown>')}: {d.get('description', '<no description>')}"
+        for d in decisions
+    )
+    first = decisions[0]
+    first_id = first.get("decision_id", "<decision_id>")
+    first_desc = first.get("description", "<description>")
+    return (
+        "<system-reminder>\n"
+        f"bicameral.preflight surfaced {len(decisions)} prior decision(s):\n"
+        f"{bullets}\n"
+        "\n"
+        "BEFORE any code edits, do NOT judge contradiction yourself — ask "
+        "the user. Per skills/bicameral-preflight Step 5.6.1, call "
+        "AskUserQuestion to disambiguate whether the current request is a "
+        "refinement of any surfaced decision. The user (not the agent) "
+        "decides; the agent then acts mechanically on the answer.\n"
+        "\n"
+        "AskUserQuestion({\n"
+        '  "question": "Your request appears to operate on the same feature '
+        f"surface as surfaced decision {first_id} "
+        f'(\\"{first_desc[:100]}\\"). Treat this work as a refinement of that prior plan?",'
+        "\n"
+        '  "multiSelect": False,\n'
+        '  "options": [\n'
+        '    {"label": "Yes — supersede prior plan",\n'
+        '     "description": "<paraphrase user\'s direction; replaces the prior wholesale>"},\n'
+        '    {"label": "Yes — keep both (addition or scoping)",\n'
+        '     "description": "<paraphrase; adds to or narrows; both remain>"},\n'
+        '    {"label": "No — unrelated to prior plan",\n'
+        '     "description": "Continue without capture"},\n'
+        "  ],\n"
+        "})\n"
+        "\n"
+        "Branch on the answer:\n"
+        "  - 'supersede'   → bicameral.ingest(source='agent_session') +\n"
+        "                    bicameral.resolve_collision(action='supersede')\n"
+        "  - 'keep both'   → bicameral.ingest(source='agent_session') +\n"
+        "                    bicameral.resolve_collision(action='keep_both')\n"
+        "  - 'unrelated'   → skip capture; proceed to implementation; narrate one\n"
+        "                    line ('noted — surfaced context isn't applicable here').\n"
+        "\n"
+        "If multiple decisions were surfaced and the user's request plausibly\n"
+        "touches more than one, ask once per plausibly-touched decision; skip\n"
+        "the question for surfaced decisions clearly unrelated to the prompt.\n"
+        "\n"
+        "Asking is cheap; missed capture is irreversible context loss. Default\n"
+        "to asking; only skip when the surfaced decision is obviously off-topic.\n"
+        "</system-reminder>"
+    )
+
+
+def main() -> int:
+    try:
+        payload = json.load(sys.stdin)
+    except (json.JSONDecodeError, ValueError):
+        return 0
+    if not isinstance(payload, dict):
+        return 0
+    if payload.get("tool_name") != PREFLIGHT_TOOL_NAME:
+        return 0
+    response = _coerce_response(payload.get("tool_response"))
+    if not response.get("fired"):
+        return 0
+    decisions = response.get("decisions") or []
+    if not isinstance(decisions, list) or not decisions:
+        return 0
+    json.dump(
+        {
+            "hookSpecificOutput": {
+                "hookEventName": "PostToolUse",
+                "additionalContext": _format_reminder(decisions),
+            }
+        },
+        sys.stdout,
+    )
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/scripts/hooks/preflight_intent.py b/scripts/hooks/preflight_intent.py
new file mode 100644
index 00000000..5910dd0a
--- /dev/null
+++ b/scripts/hooks/preflight_intent.py
@@ -0,0 +1,77 @@
+"""Preflight intent classifier.
+
+Single source of truth for the verb list used by the bicameral-preflight
+SKILL.md description and the UserPromptSubmit hook. Deterministic: no
+LLM, no network, no I/O beyond a string scan.
+"""
+
+from __future__ import annotations
+
+import re
+
+IMPLEMENTATION_VERBS: frozenset[str] = frozenset(
+    {
+        "add",
+        "build",
+        "create",
+        "implement",
+        "modify",
+        "refactor",
+        "update",
+        "fix",
+        "change",
+        "write",
+        "edit",
+        "move",
+        "rename",
+        "remove",
+        "delete",
+        "extract",
+        "convert",
+        "integrate",
+        "deploy",
+        "ship",
+        "configure",
+        "connect",
+        "extend",
+        "migrate",
+        "wire",
+        "hook up",
+        "set up",
+        "complete",
+        "finish",
+        "continue",
+    }
+)
+
+INDIRECT_INTENT_PHRASES: tuple[str, ...] = (
+    "how should i implement",
+    "how do i build",
+    "how should i write",
+    "what's the best way to add",
+    "what's the cleanest way to refactor",
+)
+
+SKIP_PATTERNS: tuple[re.Pattern[str], ...] = (
+    re.compile(r"\bfix\b.*\btypo\b", re.IGNORECASE),
+    re.compile(r"\bbump\b.*\b(?:to|from)\b.*\d+\.\d+", re.IGNORECASE),
+    re.compile(r"\bhow does\b", re.IGNORECASE),
+)
+
+_VERB_REGEX = re.compile(
+    r"\b(?:" + "|".join(re.escape(v) for v in IMPLEMENTATION_VERBS) + r")\b",
+    re.IGNORECASE,
+)
+
+
+def should_fire_preflight(prompt: str) -> bool:
+    """Return True iff prompt indicates code-implementation intent."""
+    if not prompt or not prompt.strip():
+        return False
+    for skip in SKIP_PATTERNS:
+        if skip.search(prompt):
+            return False
+    if _VERB_REGEX.search(prompt):
+        return True
+    lowered = prompt.lower()
+    return any(phrase in lowered for phrase in INDIRECT_INTENT_PHRASES)
diff --git a/scripts/hooks/preflight_reminder.py b/scripts/hooks/preflight_reminder.py
new file mode 100644
index 00000000..7ea081b2
--- /dev/null
+++ b/scripts/hooks/preflight_reminder.py
@@ -0,0 +1,93 @@
+"""UserPromptSubmit hook for Claude Code.
+
+When the user prompt indicates code-implementation intent, inject a
+system-reminder elevating bicameral.preflight above the agent's default
+tool-selection priority — but only as a write-op gate, not a discovery
+gate.
+
+Reconciles with #146: that issue's failure mode was the agent doing
+file inspection and then NEVER calling preflight at all. The original
+fix (#155) over-corrected by telling the agent to call preflight
+"before any file-inspection tool". That short-circuited the caller-LLM
+discovery the rest of the contract depends on — preflight needs
+``file_paths`` populated to do region-anchored retrieval, and the agent
+can't extract file paths if we forbid it from looking at the codebase
+first.
+
+Updated contract:
+  - Read / Grep / Glob FIRST — caller LLM resolves "the reorder feature"
+    to concrete file paths.
+  - bicameral.preflight(topic, file_paths) — fed by step 1's discovery,
+    so the server gets the deterministic binds_to lookup it needs.
+  - Write ops (Edit / Write / NotebookEdit / mutating Bash) — preflight
+    must precede the first one. This is the line that #146 was
+    actually defending; the asserter (assert_flow_2) gates exactly this.
+
+Per Claude Code 2.x hook contract: read JSON from stdin, write JSON to
+stdout shaped as ``{"hookSpecificOutput": {"hookEventName":
+"UserPromptSubmit", "additionalContext": "..."}}``. The legacy top-level
+``{"additionalContext": ...}`` shape is silently ignored by the CLI —
+the hook still runs and exits 0, but the context never reaches the
+model. Errors are swallowed silently (exit 0, empty response) so a
+broken hook never blocks a user.
+"""
+
+from __future__ import annotations
+
+import json
+import sys
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
+from hooks.preflight_intent import should_fire_preflight  # noqa: E402
+
+REMINDER_TEXT = (
+    "<system-reminder>\n"
+    "This prompt indicates code-implementation intent. Auto-fire is the "
+    "contract: `bicameral.preflight` MUST run BEFORE any write op (Edit, "
+    "Write, NotebookEdit, or Bash that mutates state — git commit, file "
+    "writes, mv/rm/etc.).\n"
+    "\n"
+    "Read-only discovery FIRST, then preflight, then writes.\n"
+    "\n"
+    "Recommended sequence:\n"
+    "  1. Use Read / Grep / Glob to map the user's request to concrete "
+    "file paths. The user often names a feature ('the reorder feature') "
+    "rather than a file (`reorder.ts`); resolve that mapping yourself "
+    "before calling preflight.\n"
+    "  2. Call `bicameral.preflight(topic, file_paths)` with BOTH a "
+    "natural-language topic AND the concrete file paths discovered in "
+    "step 1. `file_paths=[]` defeats region-anchored retrieval — the "
+    "server uses these to look up bound decisions deterministically; "
+    "topic alone falls back to fuzzy text similarity.\n"
+    "  3. Read the surfaced decisions / drifted regions / open questions, "
+    "then proceed with the implementation.\n"
+    "\n"
+    "The skill produces zero output when nothing relevant is found, so "
+    "the cost of firing is bounded. Skipping preflight is the contract "
+    "violation, not running discovery first.\n"
+    "</system-reminder>"
+)
+
+
+def main() -> int:
+    try:
+        payload = json.load(sys.stdin)
+    except (json.JSONDecodeError, ValueError):
+        return 0
+    prompt = payload.get("prompt", "") if isinstance(payload, dict) else ""
+    if should_fire_preflight(prompt):
+        json.dump(
+            {
+                "hookSpecificOutput": {
+                    "hookEventName": "UserPromptSubmit",
+                    "additionalContext": REMINDER_TEXT,
+                }
+            },
+            sys.stdout,
+        )
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/scripts/sim_accountable.py b/scripts/sim_accountable.py
index 6ffd401e..d95fe8fd 100644
--- a/scripts/sim_accountable.py
+++ b/scripts/sim_accountable.py
@@ -11,45 +11,58 @@
   Run 7  — Search in surrealkv:// persistent mode (fix 3 verification)
   Run 8  — pending_compliance_checks → resolve_compliance → reflected status (v0.9.3 skill gap fix)
 """
-import sys, asyncio, os, tempfile, shutil, pathlib
-sys.path.insert(0, '/Users/jinhongkuan/github/bicameral/pilot/mcp')
 
-REPO = '/Users/jinhongkuan/github/Accountable-App-3.0'
-os.environ['SURREAL_URL'] = 'memory://'
-os.environ['REPO_PATH'] = REPO
+import asyncio
+import os
+import pathlib
+import shutil
+import sys
+import tempfile
+
+sys.path.insert(0, "/Users/jinhongkuan/github/bicameral/pilot/mcp")
+
+REPO = "/Users/jinhongkuan/github/Accountable-App-3.0"
+os.environ["SURREAL_URL"] = "memory://"
+os.environ["REPO_PATH"] = REPO
 
 RESULTS = []
 
+
 def section(title, body):
     RESULTS.append(f"\n## {title}\n\n{body.rstrip()}\n")
-    preview = body[:120].replace('\n', ' ')
+    preview = body[:120].replace("\n", " ")
     print(f"[{title}]", preview)
 
 
 def make_fresh_ledger():
-    import importlib, adapters.ledger as _al
+    import importlib
+
+    import adapters.ledger as _al
+
     importlib.reload(_al)
     return _al.get_ledger()
 
 
 async def make_ctx(repo_path=None, surreal_url=None):
     if surreal_url:
-        os.environ['SURREAL_URL'] = surreal_url
+        os.environ["SURREAL_URL"] = surreal_url
     if repo_path:
-        os.environ['REPO_PATH'] = repo_path
+        os.environ["REPO_PATH"] = repo_path
     from adapters.code_locator import get_code_locator
+
     ledger = make_fresh_ledger()
     await ledger.connect()
     code_graph = get_code_locator()
 
     class Ctx:
         pass
+
     ctx = Ctx()
     ctx.repo_path = repo_path or REPO
-    ctx.session_id = 'sim-accountable-v2'
-    ctx.authoritative_ref = 'main'
-    ctx.authoritative_sha = ''
-    ctx.head_sha = ''
+    ctx.session_id = "sim-accountable-v2"
+    ctx.authoritative_ref = "main"
+    ctx.authoritative_sha = ""
+    ctx.head_sha = ""
     ctx.drift_analyzer = None
     ctx._sync_state = {}
     ctx.ledger = ledger
@@ -58,24 +71,70 @@ class Ctx:
 
 
 SLACK_DECISIONS = [
-    {"description": "All code changes must go to staging first via PR targeting staging branch — Ian cannot merge direct to main", "feature_group": "Dev Process", "decision_level": "L1"},
-    {"description": "Staging environment mirrors prod with real integrations (except SMS and Zoom) and must stay in sync with main", "feature_group": "Dev Process", "decision_level": "L2"},
-    {"description": "Brian Borg acts as engineering quarterback and coordinator — all PRs assigned to Brian before going to prod", "feature_group": "Dev Process", "decision_level": "L1"},
-    {"description": "All high-value secrets live in Supabase secrets — not in Vercel env vars", "feature_group": "Security", "decision_level": "L2"},
-    {"description": "Sentry auth token must be rotated and marked Sensitive in Vercel after Vercel breach exposed unprotected env vars", "feature_group": "Security", "decision_level": "L1"},
-    {"description": "Assess Sentry vs PostHog — PostHog now captures ~80% of Sentry value; evaluate eliminating redundant tool", "feature_group": "Observability", "decision_level": "L2"},
-    {"description": "Individual coaching portal for 1:1 clients to manage engagements, see recording transcripts, insights and trends", "feature_group": "Coaching Portal", "decision_level": "L1"},
-    {"description": "Weekly workshop module should be a repeatable component — AI agent populates it and creates a new record each week rather than generating new code", "feature_group": "Weekly Workshop", "decision_level": "L2"},
-    {"description": "Users can view their daily check-in completion history and trend data in the Accountable platform", "feature_group": "Daily Check-in", "decision_level": "L1"},
-    {"description": "Claude reasoning level should be task-appropriate — start at lower reasoning with escalation tiers rather than always using maximum reasoning", "feature_group": "AI Coach", "decision_level": "L2"},
-    {"description": "Weekly community bulletin delivered as a dynamic page — email directs users there rather than embedding full content to protect deliverability", "feature_group": "Email / Comms", "decision_level": "L2"},
+    {
+        "description": "All code changes must go to staging first via PR targeting staging branch — Ian cannot merge direct to main",
+        "feature_group": "Dev Process",
+        "decision_level": "L1",
+    },
+    {
+        "description": "Staging environment mirrors prod with real integrations (except SMS and Zoom) and must stay in sync with main",
+        "feature_group": "Dev Process",
+        "decision_level": "L2",
+    },
+    {
+        "description": "Brian Borg acts as engineering quarterback and coordinator — all PRs assigned to Brian before going to prod",
+        "feature_group": "Dev Process",
+        "decision_level": "L1",
+    },
+    {
+        "description": "All high-value secrets live in Supabase secrets — not in Vercel env vars",
+        "feature_group": "Security",
+        "decision_level": "L2",
+    },
+    {
+        "description": "Sentry auth token must be rotated and marked Sensitive in Vercel after Vercel breach exposed unprotected env vars",
+        "feature_group": "Security",
+        "decision_level": "L1",
+    },
+    {
+        "description": "Assess Sentry vs PostHog — PostHog now captures ~80% of Sentry value; evaluate eliminating redundant tool",
+        "feature_group": "Observability",
+        "decision_level": "L2",
+    },
+    {
+        "description": "Individual coaching portal for 1:1 clients to manage engagements, see recording transcripts, insights and trends",
+        "feature_group": "Coaching Portal",
+        "decision_level": "L1",
+    },
+    {
+        "description": "Weekly workshop module should be a repeatable component — AI agent populates it and creates a new record each week rather than generating new code",
+        "feature_group": "Weekly Workshop",
+        "decision_level": "L2",
+    },
+    {
+        "description": "Users can view their daily check-in completion history and trend data in the Accountable platform",
+        "feature_group": "Daily Check-in",
+        "decision_level": "L1",
+    },
+    {
+        "description": "Claude reasoning level should be task-appropriate — start at lower reasoning with escalation tiers rather than always using maximum reasoning",
+        "feature_group": "AI Coach",
+        "decision_level": "L2",
+    },
+    {
+        "description": "Weekly community bulletin delivered as a dynamic page — email directs users there rather than embedding full content to protect deliverability",
+        "feature_group": "Email / Comms",
+        "decision_level": "L2",
+    },
 ]
 
 
 # ── Run 1: Ingest ────────────────────────────────────────────────────────────
 
+
 async def run_ingest(ctx):
     from handlers.ingest import handle_ingest
+
     mappings = [
         {
             "intent": d["description"],
@@ -91,11 +150,14 @@ async def run_ingest(ctx):
         }
         for d in SLACK_DECISIONS
     ]
-    result = await handle_ingest(ctx, {
-        "repo": REPO,
-        "query": "Accountable platform decisions from #accountable-tech",
-        "mappings": mappings,
-    })
+    result = await handle_ingest(
+        ctx,
+        {
+            "repo": REPO,
+            "query": "Accountable platform decisions from #accountable-tech",
+            "mappings": mappings,
+        },
+    )
 
     created = result.created_decisions
     body = (
@@ -106,9 +168,11 @@ async def run_ingest(ctx):
         "Entries:\n"
     )
     for d in created:
-        body += f"  [{d.decision_level or '?'}] {d.decision_id}  \"{d.description[:58]}...\"\n"
+        body += f'  [{d.decision_level or "?"}] {d.decision_id}  "{d.description[:58]}..."\n'
 
-    l1_in_pending = [d for d in result.pending_grounding_decisions if d.get("decision_level") == "L1"]
+    l1_in_pending = [
+        d for d in result.pending_grounding_decisions if d.get("decision_level") == "L1"
+    ]
     body += (
         f"\nL1 filter: pending_grounding_decisions has "
         f"{len(result.pending_grounding_decisions)} entries, "
@@ -120,20 +184,26 @@ async def run_ingest(ctx):
 
 # ── Run 2: Preflight regression ──────────────────────────────────────────────
 
+
 async def run_preflight_quick(ctx):
     from handlers.preflight import handle_preflight
+
     r = await handle_preflight(ctx, topic="weekly workshop module repeatable component")
-    fired = getattr(r, 'fired', False)
-    count = len(getattr(r, 'decisions', []) or [])
+    fired = getattr(r, "fired", False)
+    count = len(getattr(r, "decisions", []) or [])
     body = f"Topic: 'weekly workshop module repeatable component'\nFired: {fired}, decisions surfaced: {count}\n"
-    body += "Result: " + ("PASS — preflight regression clean\n" if fired and count >= 1 else "FAIL\n")
+    body += "Result: " + (
+        "PASS — preflight regression clean\n" if fired and count >= 1 else "FAIL\n"
+    )
     section("Run 2 — Preflight regression", body)
 
 
 # ── Run 3: History + fix-2 verification ─────────────────────────────────────
 
+
 async def run_history_verify(ctx):
     from handlers.history import handle_history
+
     result = await handle_history(ctx)
     features = result.features or []
 
@@ -141,18 +211,18 @@ async def run_history_verify(ctx):
     name_ok = True
     level_ok = False
     for fg in features:
-        name = fg.name      # correct attr (was fg.feature_group in v1 sim → showed '?')
+        name = fg.name  # correct attr (was fg.feature_group in v1 sim → showed '?')
         decisions = fg.decisions or []
         body += f"  [{name}] — {len(decisions)} decision(s)\n"
-        if not name or name == '?':
+        if not name or name == "?":
             name_ok = False
         for d in decisions[:2]:
-            lvl = d.decision_level   # new field — was absent from HistoryDecision in v1 sim
+            lvl = d.decision_level  # new field — was absent from HistoryDecision in v1 sim
             body += f"    [{lvl or 'None'}|{d.status}] {d.summary[:65]}\n"
             if lvl is not None:
                 level_ok = True
 
-    body += f"\nFix 2 verdict:\n"
+    body += "\nFix 2 verdict:\n"
     body += f"  fg.name populated: {name_ok} (was '?' in v1 — fixed)\n"
     body += f"  d.decision_level populated: {level_ok} (was absent in v1 — fixed)\n"
     section("Run 3 — History + fix-2 verification (HistoryDecision.decision_level)", body)
@@ -160,6 +230,7 @@ async def run_history_verify(ctx):
 
 # ── Run 4: Bind L2 decisions to Accountable code ────────────────────────────
 
+
 async def run_bind_accountable(ctx, ingest_result):
     from handlers.bind import handle_bind
 
@@ -168,7 +239,10 @@ async def run_bind_accountable(ctx, ingest_result):
     ai_coach_id = next((v for k, v in id_by_desc.items() if "reasoning level" in k.lower()), None)
 
     if not weekly_id or not ai_coach_id:
-        section("Run 4 — Bind L2 decisions to Accountable code", "ERROR: target IDs not found in created_decisions")
+        section(
+            "Run 4 — Bind L2 decisions to Accountable code",
+            "ERROR: target IDs not found in created_decisions",
+        )
         return None
 
     bindings = [
@@ -212,12 +286,14 @@ async def run_bind_accountable(ctx, ingest_result):
 
 # ── Run 5: Drift check post-bind (should be clean) ──────────────────────────
 
+
 async def run_drift_post_bind(ctx):
     from handlers.detect_drift import handle_detect_drift
+
     target = "supabase/functions/generate-weekly-ai-insights/index.ts"
     result = await handle_detect_drift(ctx, file_path=target)
-    drifted = getattr(result, 'drifted', []) or []
-    reflected = getattr(result, 'reflected', []) or []
+    drifted = getattr(result, "drifted", []) or []
+    reflected = getattr(result, "reflected", []) or []
     body = (
         f"File: {target}\n"
         f"Drifted: {len(drifted)}, Reflected: {len(reflected)}\n"
@@ -260,21 +336,34 @@ def apply_tier_bonus(base: float, tier: str) -> float:
 async def run_full_drift_loop():
     """Follow-up 4: ingest → bind → modify file → detect drift."""
     import subprocess
-    tmpdir = tempfile.mkdtemp(prefix='bicam_drift_test_')
+
+    tmpdir = tempfile.mkdtemp(prefix="bicam_drift_test_")
     try:
         # Bootstrap a real git repo so compute_content_hash works
-        subprocess.run(['git', 'init', '-b', 'main'], cwd=tmpdir, check=True, capture_output=True)
-        subprocess.run(['git', 'config', 'user.email', 'test@test.com'], cwd=tmpdir, check=True, capture_output=True)
-        subprocess.run(['git', 'config', 'user.name', 'Test'], cwd=tmpdir, check=True, capture_output=True)
+        subprocess.run(["git", "init", "-b", "main"], cwd=tmpdir, check=True, capture_output=True)
+        subprocess.run(
+            ["git", "config", "user.email", "test@test.com"],
+            cwd=tmpdir,
+            check=True,
+            capture_output=True,
+        )
+        subprocess.run(
+            ["git", "config", "user.name", "Test"], cwd=tmpdir, check=True, capture_output=True
+        )
 
         # Write and commit initial version
         test_file = pathlib.Path(tmpdir) / "discount.py"
         test_file.write_text(TEMP_FILE_CONTENT_V1)
-        subprocess.run(['git', 'add', 'discount.py'], cwd=tmpdir, check=True, capture_output=True)
-        subprocess.run(['git', 'commit', '-m', 'initial: 10% discount on $100+'], cwd=tmpdir, check=True, capture_output=True)
+        subprocess.run(["git", "add", "discount.py"], cwd=tmpdir, check=True, capture_output=True)
+        subprocess.run(
+            ["git", "commit", "-m", "initial: 10% discount on $100+"],
+            cwd=tmpdir,
+            check=True,
+            capture_output=True,
+        )
 
-        os.environ['SURREAL_URL'] = 'memory://'
-        os.environ['REPO_PATH'] = tmpdir
+        os.environ["SURREAL_URL"] = "memory://"
+        os.environ["REPO_PATH"] = tmpdir
 
         ledger = make_fresh_ledger()
         await ledger.connect()
@@ -283,12 +372,13 @@ async def run_full_drift_loop():
 
         class Ctx:
             pass
+
         ctx = Ctx()
         ctx.repo_path = tmpdir
-        ctx.session_id = 'sim-drift-loop'
-        ctx.authoritative_ref = 'main'
-        ctx.authoritative_sha = ''
-        ctx.head_sha = ''
+        ctx.session_id = "sim-drift-loop"
+        ctx.authoritative_ref = "main"
+        ctx.authoritative_sha = ""
+        ctx.head_sha = ""
         ctx.drift_analyzer = None
         ctx._sync_state = {}
         ctx.ledger = ledger
@@ -296,64 +386,78 @@ class Ctx:
 
         # Step 1: ingest a decision about the discount logic
         from handlers.ingest import handle_ingest
-        ingest_result = await handle_ingest(ctx, {
-            "repo": tmpdir,
-            "query": "discount policy decision",
-            "mappings": [{
-                "intent": "Apply 10% discount on orders over $100",
-                "feature_group": "Pricing",
-                "decision_level": "L2",
-                "span": {
-                    "text": "Apply 10% discount on orders over $100",
-                    "source_type": "slack",
-                    "source_ref": "eng-discussion",
-                    "meeting_date": "2026-04-26",
-                    "speakers": ["Jin"],
-                },
-            }],
-        })
+
+        ingest_result = await handle_ingest(
+            ctx,
+            {
+                "repo": tmpdir,
+                "query": "discount policy decision",
+                "mappings": [
+                    {
+                        "intent": "Apply 10% discount on orders over $100",
+                        "feature_group": "Pricing",
+                        "decision_level": "L2",
+                        "span": {
+                            "text": "Apply 10% discount on orders over $100",
+                            "source_type": "slack",
+                            "source_ref": "eng-discussion",
+                            "meeting_date": "2026-04-26",
+                            "speakers": ["Jin"],
+                        },
+                    }
+                ],
+            },
+        )
         decision_id = ingest_result.created_decisions[0].decision_id
 
         # Step 2: bind to the file at its current state
         from handlers.bind import handle_bind
-        bind_result = await handle_bind(ctx, bindings=[{
-            "decision_id": decision_id,
-            "file_path": "discount.py",
-            "symbol_name": "calculate_discount",
-            "start_line": 1,
-            "end_line": 5,
-            "purpose": "Discount calculation — 10% on orders over $100",
-        }])
+
+        bind_result = await handle_bind(
+            ctx,
+            bindings=[
+                {
+                    "decision_id": decision_id,
+                    "file_path": "discount.py",
+                    "symbol_name": "calculate_discount",
+                    "start_line": 1,
+                    "end_line": 5,
+                    "purpose": "Discount calculation — 10% on orders over $100",
+                }
+            ],
+        )
         bind_ok = bind_result.bindings and not bind_result.bindings[0].error
         initial_hash = bind_result.bindings[0].content_hash if bind_ok else "?"
 
         region_id = bind_result.bindings[0].region_id
 
         # Step 3: snapshot the stored hash before modification
-        pre_hash_row = await ledger._client.query(
-            f"SELECT content_hash FROM {region_id} LIMIT 1"
-        )
+        pre_hash_row = await ledger._client.query(f"SELECT content_hash FROM {region_id} LIMIT 1")
         pre_hash = (pre_hash_row[0].get("content_hash") or "") if pre_hash_row else ""
 
         # Step 3b: check drift status — should be pending (V1: no compliance verdict yet)
         from handlers.detect_drift import handle_detect_drift
+
         pre_result = await handle_detect_drift(ctx, file_path="discount.py")
-        pre_pending = len(getattr(pre_result, 'pending', []) or [])
+        pre_pending = len(getattr(pre_result, "pending", []) or [])
 
         # Step 4: modify the file and commit (threshold and rate changed)
         test_file.write_text(TEMP_FILE_CONTENT_V2)
-        subprocess.run(['git', 'add', 'discount.py'], cwd=tmpdir, check=True, capture_output=True)
-        subprocess.run(['git', 'commit', '-m', 'change: 15% discount on $50+'], cwd=tmpdir, check=True, capture_output=True)
+        subprocess.run(["git", "add", "discount.py"], cwd=tmpdir, check=True, capture_output=True)
+        subprocess.run(
+            ["git", "commit", "-m", "change: 15% discount on $50+"],
+            cwd=tmpdir,
+            check=True,
+            capture_output=True,
+        )
 
         # Step 5: run detect_drift — triggers link_commit which re-hashes the file
         post_result = await handle_detect_drift(ctx, file_path="discount.py")
-        post_drifted = getattr(post_result, 'drifted', []) or []
-        post_pending = getattr(post_result, 'pending', []) or []
+        post_drifted = getattr(post_result, "drifted", []) or []
+        post_pending = getattr(post_result, "pending", []) or []
 
         # Step 5b: confirm the stored hash updated to reflect the new content
-        post_hash_row = await ledger._client.query(
-            f"SELECT content_hash FROM {region_id} LIMIT 1"
-        )
+        post_hash_row = await ledger._client.query(f"SELECT content_hash FROM {region_id} LIMIT 1")
         post_hash = (post_hash_row[0].get("content_hash") or "") if post_hash_row else ""
         hash_changed = pre_hash != post_hash and bool(post_hash)
 
@@ -385,66 +489,80 @@ class Ctx:
 
     finally:
         shutil.rmtree(tmpdir, ignore_errors=True)
-        os.environ['SURREAL_URL'] = 'memory://'
-        os.environ['REPO_PATH'] = REPO
+        os.environ["SURREAL_URL"] = "memory://"
+        os.environ["REPO_PATH"] = REPO
 
     section("Run 6 — Full ingest→bind→modify→drift loop (follow-up 4)", body)
 
 
 # ── Run 7: Search in surrealkv:// persistent mode ───────────────────────────
 
+
 async def run_search_persistent():
-    tmpdir = tempfile.mkdtemp(prefix='bicam_search_test_')
+    tmpdir = tempfile.mkdtemp(prefix="bicam_search_test_")
     try:
-        db_url = f'surrealkv://{tmpdir}/test.db'
-        os.environ['SURREAL_URL'] = db_url
-        os.environ['REPO_PATH'] = REPO
+        db_url = f"surrealkv://{tmpdir}/test.db"
+        os.environ["SURREAL_URL"] = db_url
+        os.environ["REPO_PATH"] = REPO
 
         ledger = make_fresh_ledger()
         await ledger.connect()
 
         from ledger.queries import upsert_decision
+
         client = ledger._client
 
         test_decisions = [
-            ("Coaching portal enables 1:1 client engagement visibility with transcripts", "Coaching Portal"),
-            ("Weekly workshop creates a new repeatable record each week via AI agent", "Weekly Workshop"),
+            (
+                "Coaching portal enables 1:1 client engagement visibility with transcripts",
+                "Coaching Portal",
+            ),
+            (
+                "Weekly workshop creates a new repeatable record each week via AI agent",
+                "Weekly Workshop",
+            ),
             ("Sentry token must be rotated after Vercel breach exposed env vars", "Security"),
         ]
         for desc, fg in test_decisions:
             await upsert_decision(
-                client, description=desc, source_type="slack",
-                source_ref="accountable-tech", status="ungrounded", feature_group=fg,
+                client,
+                description=desc,
+                source_type="slack",
+                source_ref="accountable-tech",
+                status="ungrounded",
+                feature_group=fg,
             )
 
         await asyncio.sleep(0.3)  # let FTS index settle
 
         class Ctx2:
             pass
+
         ctx2 = Ctx2()
         ctx2.repo_path = REPO
-        ctx2.session_id = 'sim-search'
-        ctx2.authoritative_ref = 'main'
-        ctx2.authoritative_sha = ''
-        ctx2.head_sha = ''
+        ctx2.session_id = "sim-search"
+        ctx2.authoritative_ref = "main"
+        ctx2.authoritative_sha = ""
+        ctx2.head_sha = ""
         ctx2.drift_analyzer = None
         ctx2._sync_state = {}
         ctx2.ledger = ledger
         ctx2.code_graph = None
 
         from handlers.search_decisions import handle_search_decisions
+
         queries = ["coaching portal", "weekly workshop", "Sentry breach"]
         results_map = {}
         for q in queries:
             r = await handle_search_decisions(ctx2, query=q)
-            results_map[q] = getattr(r, 'decisions', []) or []
+            results_map[q] = getattr(r, "decisions", []) or []
 
         total_matches = sum(len(v) for v in results_map.values())
-        body = f"DB: surrealkv:// (persistent, temp path)\nIngested 3 decisions, ran 3 queries.\n\n"
+        body = "DB: surrealkv:// (persistent, temp path)\nIngested 3 decisions, ran 3 queries.\n\n"
         for q, matches in results_map.items():
             body += f"Query: '{q}'\n  Matches: {len(matches)}\n"
             for d in matches[:2]:
-                body += f"    - {getattr(d,'description','')[:70]}\n"
+                body += f"    - {getattr(d, 'description', '')[:70]}\n"
 
         if total_matches == 0:
             body += (
@@ -460,14 +578,15 @@ class Ctx2:
 
     finally:
         shutil.rmtree(tmpdir, ignore_errors=True)
-        os.environ['SURREAL_URL'] = 'memory://'
-        os.environ['REPO_PATH'] = REPO
+        os.environ["SURREAL_URL"] = "memory://"
+        os.environ["REPO_PATH"] = REPO
 
     section("Run 7 — Search in surrealkv:// persistent mode (fix 3 verification)", body)
 
 
 # ── Run 8: pending_compliance_checks → resolve_compliance → reflected ────────
 
+
 async def run_compliance_resolution_loop():
     """
     Verify the V1 path to 'reflected' status:
@@ -477,24 +596,37 @@ async def run_compliance_resolution_loop():
     This is the exact flow the updated scan-branch / drift skills now prescribe.
     """
     import subprocess
-    tmpdir = tempfile.mkdtemp(prefix='bicam_compliance_test_')
+
+    tmpdir = tempfile.mkdtemp(prefix="bicam_compliance_test_")
     try:
-        subprocess.run(['git', 'init', '-b', 'main'], cwd=tmpdir, check=True, capture_output=True)
-        subprocess.run(['git', 'config', 'user.email', 'test@test.com'], cwd=tmpdir, check=True, capture_output=True)
-        subprocess.run(['git', 'config', 'user.name', 'Test'], cwd=tmpdir, check=True, capture_output=True)
+        subprocess.run(["git", "init", "-b", "main"], cwd=tmpdir, check=True, capture_output=True)
+        subprocess.run(
+            ["git", "config", "user.email", "test@test.com"],
+            cwd=tmpdir,
+            check=True,
+            capture_output=True,
+        )
+        subprocess.run(
+            ["git", "config", "user.name", "Test"], cwd=tmpdir, check=True, capture_output=True
+        )
 
         test_file = pathlib.Path(tmpdir) / "auth.py"
         test_file.write_text(
-            'def require_auth(request):\n'
+            "def require_auth(request):\n"
             '    """Reject unauthenticated requests with 401."""\n'
             '    if not request.get("token"):\n'
             '        raise PermissionError("401 Unauthorized")\n'
         )
-        subprocess.run(['git', 'add', 'auth.py'], cwd=tmpdir, check=True, capture_output=True)
-        subprocess.run(['git', 'commit', '-m', 'initial: auth gate'], cwd=tmpdir, check=True, capture_output=True)
+        subprocess.run(["git", "add", "auth.py"], cwd=tmpdir, check=True, capture_output=True)
+        subprocess.run(
+            ["git", "commit", "-m", "initial: auth gate"],
+            cwd=tmpdir,
+            check=True,
+            capture_output=True,
+        )
 
-        os.environ['SURREAL_URL'] = 'memory://'
-        os.environ['REPO_PATH'] = tmpdir
+        os.environ["SURREAL_URL"] = "memory://"
+        os.environ["REPO_PATH"] = tmpdir
 
         ledger = make_fresh_ledger()
         await ledger.connect()
@@ -503,12 +635,13 @@ async def run_compliance_resolution_loop():
 
         class Ctx:
             pass
+
         ctx = Ctx()
         ctx.repo_path = tmpdir
-        ctx.session_id = 'sim-compliance'
-        ctx.authoritative_ref = 'main'
-        ctx.authoritative_sha = ''
-        ctx.head_sha = ''
+        ctx.session_id = "sim-compliance"
+        ctx.authoritative_ref = "main"
+        ctx.authoritative_sha = ""
+        ctx.head_sha = ""
         ctx.drift_analyzer = None
         ctx._sync_state = {}
         ctx.ledger = ledger
@@ -516,22 +649,28 @@ class Ctx:
 
         # Step 1: ingest
         from handlers.ingest import handle_ingest
-        ingest_result = await handle_ingest(ctx, {
-            "repo": tmpdir,
-            "query": "auth gate decision",
-            "mappings": [{
-                "intent": "All API endpoints must reject unauthenticated requests with HTTP 401",
-                "feature_group": "Auth",
-                "decision_level": "L2",
-                "span": {
-                    "text": "All API endpoints must reject unauthenticated requests with HTTP 401",
-                    "source_type": "slack",
-                    "source_ref": "eng-discussion",
-                    "meeting_date": "2026-04-26",
-                    "speakers": ["Jin"],
-                },
-            }],
-        })
+
+        ingest_result = await handle_ingest(
+            ctx,
+            {
+                "repo": tmpdir,
+                "query": "auth gate decision",
+                "mappings": [
+                    {
+                        "intent": "All API endpoints must reject unauthenticated requests with HTTP 401",
+                        "feature_group": "Auth",
+                        "decision_level": "L2",
+                        "span": {
+                            "text": "All API endpoints must reject unauthenticated requests with HTTP 401",
+                            "source_type": "slack",
+                            "source_ref": "eng-discussion",
+                            "meeting_date": "2026-04-26",
+                            "speakers": ["Jin"],
+                        },
+                    }
+                ],
+            },
+        )
         decision_id = ingest_result.created_decisions[0].decision_id
 
         # Step 2: ratify the decision — proposed decisions are drift-exempt and
@@ -539,23 +678,33 @@ class Ctx:
         # In real sessions the user reviews proposed decisions and calls ratify;
         # in this simulation we ratify immediately for verification purposes.
         from handlers.ratify import handle_ratify
+
         await handle_ratify(ctx, decision_id=decision_id, signer="sim-run8", action="ratify")
 
         # Step 3: bind
         from handlers.bind import handle_bind
-        bind_result = await handle_bind(ctx, bindings=[{
-            "decision_id": decision_id,
-            "file_path": "auth.py",
-            "symbol_name": "require_auth",
-            "start_line": 1,
-            "end_line": 4,
-            "purpose": "Auth gate — reject unauthenticated requests with 401",
-        }])
+
+        bind_result = await handle_bind(
+            ctx,
+            bindings=[
+                {
+                    "decision_id": decision_id,
+                    "file_path": "auth.py",
+                    "symbol_name": "require_auth",
+                    "start_line": 1,
+                    "end_line": 4,
+                    "purpose": "Auth gate — reject unauthenticated requests with 401",
+                }
+            ],
+        )
         bind_ok = bind_result.bindings and not bind_result.bindings[0].error
         region_id = bind_result.bindings[0].region_id if bind_ok else None
 
         if not bind_ok:
-            section("Run 8 — pending_compliance_checks → resolve_compliance → reflected", "FAIL — bind failed")
+            section(
+                "Run 8 — pending_compliance_checks → resolve_compliance → reflected",
+                "FAIL — bind failed",
+            )
             return
 
         # Step 3: advance HEAD so the sync cache is stale and link_commit sweeps fresh.
@@ -563,32 +712,40 @@ class Ctx:
         # last_synced_commit, so without a new commit the detect_drift call
         # would hit the stale pre-bind cache and find 0 regions.
         test_file.write_text(
-            'def require_auth(request):\n'
+            "def require_auth(request):\n"
             '    """Reject unauthenticated requests with 401."""\n'
             '    if not request.get("token"):\n'
             '        raise PermissionError("401 Unauthorized")\n'
-            '# v2: docstring clarified\n'
+            "# v2: docstring clarified\n"
+        )
+        subprocess.run(["git", "add", "auth.py"], cwd=tmpdir, check=True, capture_output=True)
+        subprocess.run(
+            ["git", "commit", "-m", "docs: clarify require_auth docstring"],
+            cwd=tmpdir,
+            check=True,
+            capture_output=True,
         )
-        subprocess.run(['git', 'add', 'auth.py'], cwd=tmpdir, check=True, capture_output=True)
-        subprocess.run(['git', 'commit', '-m', 'docs: clarify require_auth docstring'], cwd=tmpdir, check=True, capture_output=True)
 
         # Step 4: detect_drift — triggers a fresh link_commit that sweeps auth.py,
         # finds the grounded region, and generates pending_compliance_checks.
         from handlers.detect_drift import handle_detect_drift
+
         drift_result = await handle_detect_drift(ctx, file_path="auth.py")
-        sync_status = getattr(drift_result, 'sync_status', None)
-        pending_checks = getattr(sync_status, 'pending_compliance_checks', []) or []
-        flow_id = getattr(sync_status, 'flow_id', '') or ''
+        sync_status = getattr(drift_result, "sync_status", None)
+        pending_checks = getattr(sync_status, "pending_compliance_checks", []) or []
+        flow_id = getattr(sync_status, "flow_id", "") or ""
 
         status_before = "unknown"
         if pending_checks:
             # Read the actual decision status before resolving
             from ledger.queries import project_decision_status
-            inner = getattr(ledger, '_inner', ledger)
+
+            inner = getattr(ledger, "_inner", ledger)
             status_before = await project_decision_status(inner._client, decision_id)
 
         # Step 5: call resolve_compliance for each pending check
         from handlers.resolve_compliance import handle_resolve_compliance
+
         verdicts_written = 0
         if pending_checks:
             verdicts = [
@@ -612,10 +769,11 @@ class Ctx:
 
         # Step 6: verify status is now 'reflected'
         from ledger.queries import project_decision_status
-        inner = getattr(ledger, '_inner', ledger)
+
+        inner = getattr(ledger, "_inner", ledger)
         status_after = await project_decision_status(inner._client, decision_id)
 
-        passed = (status_after == "reflected")
+        passed = status_after == "reflected"
 
         if pending_checks:
             body = (
@@ -642,14 +800,17 @@ class Ctx:
 
     finally:
         shutil.rmtree(tmpdir, ignore_errors=True)
-        os.environ['SURREAL_URL'] = 'memory://'
-        os.environ['REPO_PATH'] = REPO
+        os.environ["SURREAL_URL"] = "memory://"
+        os.environ["REPO_PATH"] = REPO
 
-    section("Run 8 — pending_compliance_checks → resolve_compliance → reflected (skill gap fix)", body)
+    section(
+        "Run 8 — pending_compliance_checks → resolve_compliance → reflected (skill gap fix)", body
+    )
 
 
 # ── Run 9: signoff/status decoupling verification ───────────────────────────
 
+
 async def run_signoff_status_decoupling():
     """
     Verify the v0.9+ orthogonalization of status (code-compliance) and signoff (human-approval):
@@ -660,30 +821,40 @@ async def run_signoff_status_decoupling():
     C. resolve_collision supersede merges signoff dict — ratification record preserved
     D. History shows superseded decisions with last code-compliance status + signoff_state
     """
-    import subprocess, datetime as dt
-    tmpdir = tempfile.mkdtemp(prefix='bicam_signoff_test_')
+    import datetime as dt
+    import subprocess
+
+    tmpdir = tempfile.mkdtemp(prefix="bicam_signoff_test_")
     try:
-        subprocess.run(['git', 'init', '-b', 'main'], cwd=tmpdir, check=True, capture_output=True)
-        subprocess.run(['git', 'config', 'user.email', 'test@test.com'], cwd=tmpdir, check=True, capture_output=True)
-        subprocess.run(['git', 'config', 'user.name', 'Test'], cwd=tmpdir, check=True, capture_output=True)
-        (pathlib.Path(tmpdir) / 'app.py').write_text('def main(): pass\n')
-        subprocess.run(['git', 'add', 'app.py'], cwd=tmpdir, check=True, capture_output=True)
-        subprocess.run(['git', 'commit', '-m', 'init'], cwd=tmpdir, check=True, capture_output=True)
-
-        os.environ['SURREAL_URL'] = 'memory://'
-        os.environ['REPO_PATH'] = tmpdir
+        subprocess.run(["git", "init", "-b", "main"], cwd=tmpdir, check=True, capture_output=True)
+        subprocess.run(
+            ["git", "config", "user.email", "test@test.com"],
+            cwd=tmpdir,
+            check=True,
+            capture_output=True,
+        )
+        subprocess.run(
+            ["git", "config", "user.name", "Test"], cwd=tmpdir, check=True, capture_output=True
+        )
+        (pathlib.Path(tmpdir) / "app.py").write_text("def main(): pass\n")
+        subprocess.run(["git", "add", "app.py"], cwd=tmpdir, check=True, capture_output=True)
+        subprocess.run(["git", "commit", "-m", "init"], cwd=tmpdir, check=True, capture_output=True)
+
+        os.environ["SURREAL_URL"] = "memory://"
+        os.environ["REPO_PATH"] = tmpdir
         ledger = make_fresh_ledger()
         await ledger.connect()
         from adapters.code_locator import get_code_locator
 
         class Ctx:
             pass
+
         ctx = Ctx()
         ctx.repo_path = tmpdir
-        ctx.session_id = 'sim-signoff'
-        ctx.authoritative_ref = 'main'
-        ctx.authoritative_sha = ''
-        ctx.head_sha = ''
+        ctx.session_id = "sim-signoff"
+        ctx.authoritative_ref = "main"
+        ctx.authoritative_sha = ""
+        ctx.head_sha = ""
         ctx.drift_analyzer = None
         ctx._sync_state = {}
         ctx.ledger = ledger
@@ -698,36 +869,39 @@ class Ctx:
         from handlers.ingest import handle_ingest
         from ledger.queries import project_decision_status
 
-        ingest_r = await handle_ingest(ctx, {
-            "repo": tmpdir,
-            "query": "signoff decoupling test",
-            "mappings": [{
-                "intent": "Feature flags must be documented before enabling in prod",
-                "feature_group": "Release",
-                "decision_level": "L2",
-                "span": {
-                    "text": "Feature flags must be documented before enabling in prod",
-                    "source_type": "slack",
-                    "source_ref": "eng-channel",
-                    "meeting_date": "2026-04-26",
-                    "speakers": ["Jin"],
-                },
-                # NOTE: no 'signoff' key — server stamps signoff.state='proposed'
-            }],
-        })
+        ingest_r = await handle_ingest(
+            ctx,
+            {
+                "repo": tmpdir,
+                "query": "signoff decoupling test",
+                "mappings": [
+                    {
+                        "intent": "Feature flags must be documented before enabling in prod",
+                        "feature_group": "Release",
+                        "decision_level": "L2",
+                        "span": {
+                            "text": "Feature flags must be documented before enabling in prod",
+                            "source_type": "slack",
+                            "source_ref": "eng-channel",
+                            "meeting_date": "2026-04-26",
+                            "speakers": ["Jin"],
+                        },
+                        # NOTE: no 'signoff' key — server stamps signoff.state='proposed'
+                    }
+                ],
+            },
+        )
         did = ingest_r.created_decisions[0].decision_id
 
-        inner = getattr(ledger, '_inner', ledger)
+        inner = getattr(ledger, "_inner", ledger)
         code_status = await project_decision_status(inner._client, did)
 
-        raw_rows = await inner._client.query(
-            f"SELECT signoff FROM {did} LIMIT 1"
-        )
-        raw_signoff = (raw_rows[0].get('signoff') or {}) if raw_rows else {}
-        signoff_state = raw_signoff.get('state', '?')
-        discovered = raw_signoff.get('discovered', '?')
+        raw_rows = await inner._client.query(f"SELECT signoff FROM {did} LIMIT 1")
+        raw_signoff = (raw_rows[0].get("signoff") or {}) if raw_rows else {}
+        signoff_state = raw_signoff.get("state", "?")
+        discovered = raw_signoff.get("discovered", "?")
 
-        a_pass = (code_status == 'ungrounded' and signoff_state == 'proposed')
+        a_pass = code_status == "ungrounded" and signoff_state == "proposed"
         results_a = [
             f"  decision_id:    {did}",
             f"  status:         {code_status}  (expected: ungrounded)",
@@ -738,9 +912,7 @@ class Ctx:
 
         # ── B: session-start banner detects stale proposal via signoff ────────
         # Backdate the signoff to simulate 15-day-old proposal
-        stale_created = (
-            dt.datetime.now(dt.timezone.utc) - dt.timedelta(days=15)
-        ).isoformat()
+        stale_created = (dt.datetime.now(dt.UTC) - dt.timedelta(days=15)).isoformat()
         await inner._client.execute(
             f"UPDATE {did} SET signoff = $s",
             {"s": {**raw_signoff, "created_at": stale_created}},
@@ -748,6 +920,7 @@ class Ctx:
 
         # Mock the ledger's get_decisions_by_status to return our stale-proposal row
         from unittest.mock import AsyncMock, patch
+
         stale_row = {
             "decision_id": did,
             "description": "Feature flags must be documented before enabling in prod",
@@ -759,6 +932,7 @@ class Ctx:
 
         class BannerCtx:
             pass
+
         bctx = BannerCtx()
         bctx._sync_state = {}
         mock_ledger = AsyncMock()
@@ -766,14 +940,15 @@ class BannerCtx:
         bctx.ledger = mock_ledger
 
         from handlers.sync_middleware import get_session_start_banner
+
         banner = await get_session_start_banner(bctx)
 
         b_pass = (
             banner is not None
             and banner.stale_proposal_count == 1
             and banner.proposal_count == 1
-            and any(i.get('signoff_state') == 'proposed' for i in banner.items)
-            and 'stale proposal' in banner.message
+            and any(i.get("signoff_state") == "proposed" for i in banner.items)
+            and "stale proposal" in banner.message
         )
         results_b = [
             f"  banner fired:           {banner is not None}",
@@ -788,37 +963,46 @@ class BannerCtx:
         # ── C: resolve_collision supersede merges signoff ─────────────────────
         # Ratify the old decision first
         from handlers.ratify import handle_ratify
+
         rat = await handle_ratify(ctx, decision_id=did, signer="sim-run9")
         old_signoff_after_ratify = rat.signoff
 
         # Ingest a new superseding decision
-        ingest_new = await handle_ingest(ctx, {
-            "repo": tmpdir,
-            "query": "supersede test",
-            "mappings": [{
-                "intent": "Feature flags must be documented AND reviewed by two engineers before prod",
-                "feature_group": "Release",
-                "decision_level": "L2",
-                "span": {
-                    "text": "Feature flags must be documented AND reviewed by two engineers",
-                    "source_type": "slack",
-                    "source_ref": "eng-channel-v2",
-                    "meeting_date": "2026-04-26",
-                    "speakers": ["Jin"],
-                },
-            }],
-        })
+        ingest_new = await handle_ingest(
+            ctx,
+            {
+                "repo": tmpdir,
+                "query": "supersede test",
+                "mappings": [
+                    {
+                        "intent": "Feature flags must be documented AND reviewed by two engineers before prod",
+                        "feature_group": "Release",
+                        "decision_level": "L2",
+                        "span": {
+                            "text": "Feature flags must be documented AND reviewed by two engineers",
+                            "source_type": "slack",
+                            "source_ref": "eng-channel-v2",
+                            "meeting_date": "2026-04-26",
+                            "speakers": ["Jin"],
+                        },
+                    }
+                ],
+            },
+        )
         new_did = ingest_new.created_decisions[0].decision_id
 
         from handlers.resolve_collision import handle_resolve_collision
+
         await handle_resolve_collision(ctx, new_id=new_did, old_id=did, action="supersede")
 
         # Read the old decision's signoff after supersession
         post_rows = await inner._client.query(f"SELECT signoff FROM {did} LIMIT 1")
-        post_signoff = (post_rows[0].get('signoff') or {}) if post_rows else {}
+        post_signoff = (post_rows[0].get("signoff") or {}) if post_rows else {}
 
-        c_ratified_preserved = post_signoff.get('ratified_at') == old_signoff_after_ratify.get('ratified_at')
-        c_state_superseded = post_signoff.get('state') == 'superseded'
+        c_ratified_preserved = post_signoff.get("ratified_at") == old_signoff_after_ratify.get(
+            "ratified_at"
+        )
+        c_state_superseded = post_signoff.get("state") == "superseded"
         c_pass = c_state_superseded and c_ratified_preserved
 
         results_c = [
@@ -831,15 +1015,15 @@ class BannerCtx:
 
         # ── D: history shows superseded decisions with code-compliance status ─
         from handlers.history import handle_history
+
         hist = await handle_history(ctx)
         superseded_decisions = [
-            d for fg in hist.features for d in fg.decisions
-            if d.signoff_state == 'superseded'
+            d for fg in hist.features for d in fg.decisions if d.signoff_state == "superseded"
         ]
         d_pass = (
             len(superseded_decisions) == 1
-            and superseded_decisions[0].status in ('ungrounded', 'pending', 'drifted', 'reflected')
-            and superseded_decisions[0].signoff_state == 'superseded'
+            and superseded_decisions[0].status in ("ungrounded", "pending", "drifted", "reflected")
+            and superseded_decisions[0].signoff_state == "superseded"
         )
         results_d_dec = superseded_decisions[0] if superseded_decisions else None
         results_d = [
@@ -851,20 +1035,24 @@ class BannerCtx:
 
     finally:
         shutil.rmtree(tmpdir, ignore_errors=True)
-        os.environ['SURREAL_URL'] = 'memory://'
-        os.environ['REPO_PATH'] = REPO
+        os.environ["SURREAL_URL"] = "memory://"
+        os.environ["REPO_PATH"] = REPO
 
     all_pass = a_pass and b_pass and c_pass and d_pass
     body = (
         "Testing v0.9+ status/signoff orthogonalization:\n\n"
         "A — Ingest without signoff → status='ungrounded', signoff.state='proposed'\n"
-        + '\n'.join(results_a) + '\n\n'
+        + "\n".join(results_a)
+        + "\n\n"
         "B — Session-start banner detects stale proposals via signoff.state (not status)\n"
-        + '\n'.join(results_b) + '\n\n'
+        + "\n".join(results_b)
+        + "\n\n"
         "C — resolve_collision supersede merges signoff (preserves ratification record)\n"
-        + '\n'.join(results_c) + '\n\n'
+        + "\n".join(results_c)
+        + "\n\n"
         "D — History surfaces superseded decisions with last code-compliance status\n"
-        + '\n'.join(results_d) + '\n\n'
+        + "\n".join(results_d)
+        + "\n\n"
         f"Overall: {'PASS — all four orthogonalization invariants hold' if all_pass else 'PARTIAL PASS — see sub-results'}\n"
     )
     section("Run 9 — signoff/status decoupling verification (v0.9+)", body)
@@ -872,10 +1060,11 @@ class BannerCtx:
 
 # ── main ─────────────────────────────────────────────────────────────────────
 
+
 async def main():
     print("=== Bicameral MCP v0.9.3 extended simulation ===\n")
 
-    ctx = await make_ctx(repo_path=REPO, surreal_url='memory://')
+    ctx = await make_ctx(repo_path=REPO, surreal_url="memory://")
     ingest_result = await run_ingest(ctx)
     await run_preflight_quick(ctx)
     await run_history_verify(ctx)
diff --git a/scripts/sim_issue_108_flows.py b/scripts/sim_issue_108_flows.py
index a524b05c..6590068a 100644
--- a/scripts/sim_issue_108_flows.py
+++ b/scripts/sim_issue_108_flows.py
@@ -75,9 +75,7 @@ class Ctx:
 
 def init_temp_git(prefix: str) -> str:
     tmpdir = tempfile.mkdtemp(prefix=prefix)
-    subprocess.run(
-        ["git", "init", "-b", "main"], cwd=tmpdir, check=True, capture_output=True
-    )
+    subprocess.run(["git", "init", "-b", "main"], cwd=tmpdir, check=True, capture_output=True)
     subprocess.run(
         ["git", "config", "user.email", "sim@sim.com"],
         cwd=tmpdir,
@@ -98,9 +96,7 @@ def commit_file(repo: str, relpath: str, content: str, message: str) -> None:
     p.parent.mkdir(parents=True, exist_ok=True)
     p.write_text(content)
     subprocess.run(["git", "add", relpath], cwd=repo, check=True, capture_output=True)
-    subprocess.run(
-        ["git", "commit", "-m", message], cwd=repo, check=True, capture_output=True
-    )
+    subprocess.run(["git", "commit", "-m", message], cwd=repo, check=True, capture_output=True)
 
 
 # ── Flow 1: Record decisions from a meeting ────────────────────────────
@@ -156,9 +152,7 @@ async def flow_1_record_decisions() -> None:
 
         # Read raw signoff to verify state
         inner = getattr(ctx.ledger, "_inner", ctx.ledger)
-        raw_rows = await inner._client.query(
-            f"SELECT signoff FROM {decision_id} LIMIT 1"
-        )
+        raw_rows = await inner._client.query(f"SELECT signoff FROM {decision_id} LIMIT 1")
         raw_signoff = (raw_rows[0].get("signoff") or {}) if raw_rows else {}
         signoff_state_post_ingest = raw_signoff.get("state", "?")
         status_post_ingest = await project_decision_status(inner._client, decision_id)
@@ -174,8 +168,7 @@ async def flow_1_record_decisions() -> None:
             and signoff_state_post_ingest == "proposed"
             and status_post_ingest == "ungrounded"
             and signoff_state_post_ratify == "ratified"
-            and status_post_ratify
-            == "ungrounded"  # still ungrounded — bind not yet called
+            and status_post_ratify == "ungrounded"  # still ungrounded — bind not yet called
         )
 
         body = (
@@ -396,9 +389,7 @@ async def flow_3_commit_to_reflected() -> None:
         # Out-of-session-committer invariant: status === 'pending' is the state that
         # drives the dashboard tooltip. Tooltip text in dashboard.html:
         #   "Pending compliance — run /bicameral-sync in your Claude Code session to resolve."
-        out_of_session_state_correct = (
-            status_pending == "pending" and len(pending_checks) >= 1
-        )
+        out_of_session_state_correct = status_pending == "pending" and len(pending_checks) >= 1
 
         # Caller-LLM resolves the queue (this is what /bicameral-sync does)
         verdicts = [
@@ -413,17 +404,11 @@ async def flow_3_commit_to_reflected() -> None:
             for c in pending_checks
         ]
         if verdicts:
-            await handle_resolve_compliance(
-                ctx, phase="drift", verdicts=verdicts, flow_id=flow_id
-            )
+            await handle_resolve_compliance(ctx, phase="drift", verdicts=verdicts, flow_id=flow_id)
 
         status_after = await project_decision_status(inner._client, decision_id)
 
-        passed = (
-            out_of_session_state_correct
-            and bool(flow_id)
-            and status_after == "reflected"
-        )
+        passed = out_of_session_state_correct and bool(flow_id) and status_after == "reflected"
 
         body = (
             f"Pre-resolve (out-of-session committer state):\n"
@@ -464,9 +449,7 @@ async def flow_3a_ephemeral_branch() -> None:
         check=True,
         capture_output=True,
     )
-    commit_file(
-        tmpdir, "feat.py", "def feature():\n    return 'branch'\n", "feat: branch impl"
-    )
+    commit_file(tmpdir, "feat.py", "def feature():\n    return 'branch'\n", "feat: branch impl")
 
     try:
         ctx = await make_temp_ctx(tmpdir, "sim-flow3a")
@@ -546,18 +529,14 @@ async def flow_3a_ephemeral_branch() -> None:
                 }
                 for c in pending_checks
             ]
-            await handle_resolve_compliance(
-                ctx, phase="drift", verdicts=verdicts, flow_id=flow_id
-            )
+            await handle_resolve_compliance(ctx, phase="drift", verdicts=verdicts, flow_id=flow_id)
 
         inner = getattr(ctx.ledger, "_inner", ctx.ledger)
         status_on_branch = await project_decision_status(inner._client, did)
 
         # Switch back to main — ensure_ledger_synced should fire on next tool call
         # and the stale repair should mark the decision drifted (since H_main != H_branch).
-        subprocess.run(
-            ["git", "checkout", "main"], cwd=tmpdir, check=True, capture_output=True
-        )
+        subprocess.run(["git", "checkout", "main"], cwd=tmpdir, check=True, capture_output=True)
         # Force fresh sync by invalidating any caches
         try:
             from handlers.link_commit import invalidate_sync_cache
@@ -650,12 +629,8 @@ async def flow_4_session_end_capture() -> None:
         decision_id = ingest_r.created_decisions[0].decision_id
 
         inner = getattr(ctx.ledger, "_inner", ctx.ledger)
-        raw_rows = await inner._client.query(
-            f"SELECT signoff FROM {decision_id} LIMIT 1"
-        )
-        signoff_state = (
-            (raw_rows[0].get("signoff") or {}).get("state", "?") if raw_rows else "?"
-        )
+        raw_rows = await inner._client.query(f"SELECT signoff FROM {decision_id} LIMIT 1")
+        signoff_state = (raw_rows[0].get("signoff") or {}).get("state", "?") if raw_rows else "?"
         status = await project_decision_status(inner._client, decision_id)
 
         # Verify source_type round-trips (history readback is the user-facing surface)
@@ -667,9 +642,7 @@ async def flow_4_session_end_capture() -> None:
         target = next((d for d in all_decisions if d.id == decision_id), None)
         sources = target.sources if target else []
         # HistorySource is a Pydantic model — attribute access, not .get()
-        source_types = (
-            [getattr(s, "source_type", "?") for s in sources] if sources else []
-        )
+        source_types = [getattr(s, "source_type", "?") for s in sources] if sources else []
         source_type_round_trip = source_types[0] if source_types else "?"
 
         passed = (
@@ -771,9 +744,7 @@ async def flow_5_history_axes() -> None:
         }
 
         all_have_status = all(d.status in valid_status for d in all_decisions)
-        all_have_signoff = all(
-            (d.signoff_state in valid_signoff) for d in all_decisions
-        )
+        all_have_signoff = all((d.signoff_state in valid_signoff) for d in all_decisions)
         feature_count = len(hist.features)
 
         # Verify the orthogonalization: the ratified decision should show
@@ -786,17 +757,16 @@ async def flow_5_history_axes() -> None:
         )
 
         passed = (
-            feature_count >= 2
-            and all_have_status
-            and all_have_signoff
-            and ratified_axes_correct
+            feature_count >= 2 and all_have_status and all_have_signoff and ratified_axes_correct
         )
 
         body = f"Feature groups: {feature_count}\n\n"
         for fg in hist.features:
             body += f"  [{fg.name}] — {len(fg.decisions)} decision(s)\n"
             for d in fg.decisions:
-                body += f"    status={d.status}  signoff_state={d.signoff_state}  '{d.summary[:50]}'\n"
+                body += (
+                    f"    status={d.status}  signoff_state={d.signoff_state}  '{d.summary[:50]}'\n"
+                )
 
         body += (
             f"\nSpec invariant — orthogonal axes:\n"
diff --git a/server.py b/server.py
index 340ebbe7..a0ceb507 100644
--- a/server.py
+++ b/server.py
@@ -37,19 +37,19 @@
 from mcp.types import TextContent, Tool
 
 from context import BicameralContext
-from ledger.schema import DestructiveMigrationRequired, SchemaVersionTooNew
+from dashboard.server import get_dashboard_server
 from handlers.bind import handle_bind
 from handlers.gap_judge import handle_judge_gaps
+from handlers.history import handle_history
 from handlers.ingest import handle_ingest
 from handlers.link_commit import handle_link_commit
 from handlers.preflight import handle_preflight
-from handlers.reset import handle_reset
 from handlers.ratify import handle_ratify
+from handlers.reset import handle_reset
 from handlers.resolve_collision import handle_resolve_collision
 from handlers.resolve_compliance import handle_resolve_compliance
-from handlers.history import handle_history
 from handlers.update import get_update_notice, handle_update
-from dashboard.server import get_dashboard_server
+from ledger.schema import DestructiveMigrationRequired, SchemaVersionTooNew
 
 SERVER_NAME = "bicameral-mcp"
 
@@ -72,14 +72,13 @@ def _resolve_server_version() -> str:
     for candidate in (here, here.parent):
         toml = candidate / "pyproject.toml"
         if toml.exists():
-            m = re.search(
-                r'^version\s*=\s*"([^"]+)"', toml.read_text(), re.MULTILINE
-            )
+            m = re.search(r'^version\s*=\s*"([^"]+)"', toml.read_text(), re.MULTILINE)
             if m:
                 return m.group(1)
 
     try:
         from importlib.metadata import version as _pkg_version
+
         return _pkg_version("bicameral-mcp")
     except Exception:
         return "0.1.0"
@@ -194,12 +193,30 @@ async def list_tools() -> list[Tool]:
                         "items": {
                             "type": "object",
                             "properties": {
-                                "decision_id": {"type": "string", "description": "Decision ID from the ledger (e.g. from pending_grounding_decisions)"},
-                                "file_path": {"type": "string", "description": "Repo-relative path to the file"},
-                                "symbol_name": {"type": "string", "description": "Function/class/method name"},
-                                "start_line": {"type": "integer", "description": "1-indexed start line (optional — omit to auto-resolve automatically)"},
-                                "end_line": {"type": "integer", "description": "1-indexed end line (optional)"},
-                                "purpose": {"type": "string", "description": "Optional one-line description for display"},
+                                "decision_id": {
+                                    "type": "string",
+                                    "description": "Decision ID from the ledger (e.g. from pending_grounding_decisions)",
+                                },
+                                "file_path": {
+                                    "type": "string",
+                                    "description": "Repo-relative path to the file",
+                                },
+                                "symbol_name": {
+                                    "type": "string",
+                                    "description": "Function/class/method name",
+                                },
+                                "start_line": {
+                                    "type": "integer",
+                                    "description": "1-indexed start line (optional — omit to auto-resolve automatically)",
+                                },
+                                "end_line": {
+                                    "type": "integer",
+                                    "description": "1-indexed end line (optional)",
+                                },
+                                "purpose": {
+                                    "type": "string",
+                                    "description": "Optional one-line description for display",
+                                },
                             },
                             "required": ["decision_id", "file_path", "symbol_name"],
                         },
@@ -794,16 +811,25 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
             "t0": time.monotonic(),
             "rationale": arguments.get("rationale", ""),
         }
-        return [TextContent(type="text", text=json.dumps({
-            "session_id": session_id,
-            "skill": arguments["skill_name"],
-            "status": "started",
-        }))]
+        return [
+            TextContent(
+                type="text",
+                text=json.dumps(
+                    {
+                        "session_id": session_id,
+                        "skill": arguments["skill_name"],
+                        "status": "started",
+                    }
+                ),
+            )
+        ]
 
     if name == "bicameral.skill_end":
         from pydantic import ValidationError
-        from telemetry import record_skill_event
+
         from contracts import SKILL_DIAGNOSTIC_MODELS
+        from telemetry import record_skill_event
+
         session_id = arguments["session_id"]
         skill_name = arguments["skill_name"]
         errored = arguments.get("errored", False)
@@ -825,8 +851,7 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
                 diagnostic = validated.model_dump()
             except ValidationError as exc:
                 unknown_fields = [
-                    e["loc"][0] for e in exc.errors()
-                    if e["type"] == "extra_forbidden" and e["loc"]
+                    e["loc"][0] for e in exc.errors() if e["type"] == "extra_forbidden" and e["loc"]
                 ]
                 # Strip unknowns and validate the remaining known fields.
                 known_raw = {k: v for k, v in raw_diagnostic.items() if k not in unknown_fields}
@@ -839,8 +864,14 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
             diagnostic = raw_diagnostic or None
 
         record_skill_event(
-            skill_name, session_id, duration_ms, errored, SERVER_VERSION,
-            diagnostic=diagnostic, error_class=error_class, rationale=rationale,
+            skill_name,
+            session_id,
+            duration_ms,
+            errored,
+            SERVER_VERSION,
+            diagnostic=diagnostic,
+            error_class=error_class,
+            rationale=rationale,
         )
         response: dict = {
             "session_id": session_id,
@@ -857,6 +888,7 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
 
     if name == "bicameral.feedback":
         from telemetry import send_event
+
         send_event(
             SERVER_VERSION,
             event_type="agent_feedback",
@@ -869,6 +901,7 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
 
     if name == "bicameral.usage_summary":
         from handlers.usage_summary import handle_usage_summary
+
         data = await handle_usage_summary(ctx, days=int(arguments.get("days", 7)))
         return [TextContent(type="text", text=json.dumps(data, indent=2))]
 
@@ -878,11 +911,11 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
     _sync_result = None
     if name not in ("bicameral.link_commit", "link_commit", "bicameral.update", "update"):
         from handlers.sync_middleware import ensure_ledger_synced
+
         _sync_result = await ensure_ledger_synced(ctx)
 
     try:
         if name in ("bicameral.link_commit", "link_commit"):
-
             result = await handle_link_commit(
                 ctx,
                 commit_hash=arguments.get("commit_hash", "HEAD"),
@@ -924,10 +957,12 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
             # Honest empty path — handler returns None when no matches.
             # Emit an empty envelope the agent can detect and skip on.
             if result is None:
-                return [TextContent(
-                    type="text",
-                    text=json.dumps({"judgment_payload": None, "topic": arguments["topic"]}),
-                )]
+                return [
+                    TextContent(
+                        type="text",
+                        text=json.dumps({"judgment_payload": None, "topic": arguments["topic"]}),
+                    )
+                ]
         elif name in ("bicameral.resolve_compliance", "resolve_compliance"):
             result = await handle_resolve_compliance(
                 ctx,
@@ -983,6 +1018,7 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
                 return [TextContent(type="text", text=json.dumps(payload, indent=2))]
         elif name in ("bicameral.dashboard", "dashboard"):
             from contracts import DashboardResponse
+
             srv = get_dashboard_server()
             if not srv.running:
                 await srv.start(ctx_factory=BicameralContext.from_env)
@@ -1059,10 +1095,12 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
             if isinstance(exc, DestructiveMigrationRequired)
             else "upgrade your binary: pipx upgrade bicameral-mcp"
         )
-        return [TextContent(
-            type="text",
-            text=json.dumps({"error": str(exc), "action": action}, indent=2),
-        )]
+        return [
+            TextContent(
+                type="text",
+                text=json.dumps({"error": str(exc), "action": action}, indent=2),
+            )
+        ]
 
 
 async def run_smoke_test() -> dict[str, object]:
@@ -1110,6 +1148,7 @@ async def serve_stdio() -> None:
     # below once the session is live.
     try:
         from consent import notify_if_first_run
+
         notify_if_first_run()
     except Exception:
         pass
@@ -1202,18 +1241,22 @@ def _dispatch(args: Any) -> int:
     """Route parsed args to the appropriate handler. Returns exit code."""
     if args.command == "config":
         from setup_wizard import run_config_wizard
+
         return run_config_wizard()
     if args.command == "reset":
         from setup_wizard import run_reset_wizard
+
         return run_reset_wizard()
     if args.command == "setup":
         from setup_wizard import run_setup
+
         return run_setup(args.repo_path, args.history_path)
     # triage-adapt: link_commit dispatch — added per #124 backport without
     # the broader _register_subparsers/_dispatch refactor or the branch-scan
     # / --with-push-hook prerequisites
     if args.command == "link_commit":
         from cli.link_commit_cli import main as link_commit_main
+
         return link_commit_main(args.commit_hash, quiet=args.quiet)
     if args.smoke_test:
         result = asyncio.run(run_smoke_test())
diff --git a/setup_wizard.py b/setup_wizard.py
index d687efc9..fd1667e4 100644
--- a/setup_wizard.py
+++ b/setup_wizard.py
@@ -55,7 +55,7 @@ def _detect_history_path(repo_path: Path, hint: str | None = None) -> Path:
         return repo_path
 
     raw = input(
-        f"\n  History storage path (default: same as repo — press Enter to skip):\n  > "
+        "\n  History storage path (default: same as repo — press Enter to skip):\n  > "
     ).strip()
     if not raw:
         return repo_path
@@ -127,6 +127,7 @@ def _detect_agents() -> list[str]:
 def _is_interactive() -> bool:
     """Check if stdin is a terminal (not piped)."""
     import sys
+
     return sys.stdin.isatty()
 
 
@@ -306,11 +307,17 @@ def _install_for_agent(
         config_json = json.dumps(config)
         subprocess.run(
             ["claude", "mcp", "remove", "bicameral", "--scope", "project"],
-            capture_output=True, text=True, timeout=10, cwd=str(repo_path),
+            capture_output=True,
+            text=True,
+            timeout=10,
+            cwd=str(repo_path),
         )
         result = subprocess.run(
             ["claude", "mcp", "add-json", "bicameral", "--scope", "project", config_json],
-            capture_output=True, text=True, timeout=10, cwd=str(repo_path),
+            capture_output=True,
+            text=True,
+            timeout=10,
+            cwd=str(repo_path),
         )
         if result.returncode == 0:
             print(f"  {agent['name']}: installed via CLI")
@@ -323,8 +330,15 @@ def _install_for_agent(
         for k, v in config["env"].items():
             env_args.extend(["--env", f"{k}={v}"])
         result = subprocess.run(
-            ["codex", "mcp", "add", "bicameral"] + env_args + ["--"] + [config["command"]] + config["args"],
-            capture_output=True, text=True, timeout=10, cwd=str(repo_path),
+            ["codex", "mcp", "add", "bicameral"]
+            + env_args
+            + ["--"]
+            + [config["command"]]
+            + config["args"],
+            capture_output=True,
+            text=True,
+            timeout=10,
+            cwd=str(repo_path),
         )
         if result.returncode == 0:
             print(f"  {agent['name']}: installed via CLI")
@@ -332,44 +346,105 @@ def _install_for_agent(
 
     # Fallback: write config file directly
     if agent.get("config_format") == "toml":
-        _write_toml_config(repo_path, config_path, data_path=data_path, mode=mode, telemetry=telemetry)
+        _write_toml_config(
+            repo_path, config_path, data_path=data_path, mode=mode, telemetry=telemetry
+        )
     else:
-        _write_json_config(repo_path, config_path, data_path=data_path, mode=mode, telemetry=telemetry)
+        _write_json_config(
+            repo_path, config_path, data_path=data_path, mode=mode, telemetry=telemetry
+        )
 
     print(f"  {agent['name']}: wrote {config_path}")
     return True
 
 
-_BICAMERAL_SESSION_END_COMMAND = (
-    "[ -d .bicameral ] && claude -p '/bicameral:capture-corrections' || true"
-)
+def _build_session_end_command(mcp_config_path: str | None = None) -> str:
+    """Build the SessionEnd hook command, optionally with `--mcp-config` flags.
+
+    Production end-users have ``bicameral`` registered in their default
+    Claude Code MCP config (via the setup wizard's `claude mcp add`), so
+    the spawned subprocess inherits it without an explicit flag. Test
+    harnesses that drive ``claude -p`` against a non-default ledger
+    (e.g. ``tests/e2e/run_e2e_flows.py`` pointing SURREAL_URL at a
+    test-results path) must pass ``--mcp-config`` so the spawned
+    subprocess writes to the same ledger that the parent session and
+    post-hoc validators use; otherwise capture-corrections lands its
+    ``source=agent_session`` decisions in ``~/.bicameral/ledger.db``
+    instead of the harness's test ledger.
+
+    The no-args call returns the canonical command prescribed by
+    ``skills/bicameral-capture-corrections/SKILL.md:207`` byte-exact —
+    that's what end-user installs ship.
+    """
+    import shlex
+
+    extra_flags = ""
+    if mcp_config_path:
+        extra_flags = f" --mcp-config {shlex.quote(str(mcp_config_path))} --strict-mcp-config"
+    return (
+        '[ -d .bicameral ] && [ -z "$BICAMERAL_SESSION_END_RUNNING" ] && '
+        "BICAMERAL_SESSION_END_RUNNING=1 "
+        f"claude -p '/bicameral:capture-corrections --auto-ingest'{extra_flags} || true"
+    )
+
+
+# Canonical no-args form — what `_install_claude_hooks` writes to a fresh
+# end-user's ``.claude/settings.json``. Re-derived from the helper so the
+# function is the single source of truth.
+_BICAMERAL_SESSION_END_COMMAND = _build_session_end_command()
 
 # Fires after every Bash tool use. When the command is a git write-op
-# (commit / merge / pull / rebase --continue), prints a trigger line that
-# causes the agent to invoke /bicameral:sync — running the full
-# link_commit → compliance check flow so status is authoritative immediately.
-_BICAMERAL_POST_COMMIT_COMMAND = (
-    "python3 -c \""
-    "import json,sys; "
-    "d=json.load(sys.stdin); "
-    "c=d.get('tool_input',{}).get('command',''); "
-    "ops=('git commit','git merge ','git pull','git rebase --continue'); "
-    "[print('bicameral: new commit detected — run /bicameral:sync to resolve compliance and get authoritative reflected/drifted status') "
-    "for _ in [1] if any(op in c for op in ops)]\""
-)
+# (commit / merge / pull / rebase --continue), emits a hookSpecificOutput
+# envelope whose additionalContext nudges the agent to invoke
+# /bicameral:sync — running the full link_commit → compliance check
+# flow so status is authoritative immediately.
+#
+# Was a plain-stdout python -c one-liner. Per Claude Code 2.x hook docs
+# (https://code.claude.com/docs/en/hooks), plain stdout from PostToolUse
+# is dropped to the debug log — only UserPromptSubmit / UserPromptExpansion
+# / SessionStart treat raw stdout as agent-visible context. Symptom: the
+# agent committed but never followed through with link_commit because
+# the reminder never reached the model. Console script writes the proper
+# envelope; source: scripts/hooks/post_commit_sync_reminder.py.
+_BICAMERAL_POST_COMMIT_COMMAND = "bicameral-mcp-post-commit-sync-reminder"
+
+# UserPromptSubmit hook: deterministic regex over a verb list elevates
+# bicameral.preflight above the agent's default tool-selection priority
+# whenever a prompt indicates code-implementation intent. Console script
+# is exposed via pyproject.toml [project.scripts] so it resolves on PATH
+# regardless of cwd. Closes #146 for end-user installs (the dogfood path
+# in the bicameral repo's own .claude/settings.json invokes the source
+# file directly via python3).
+_BICAMERAL_PREFLIGHT_REMINDER_COMMAND = "bicameral-mcp-preflight-reminder"
+
+# PostToolUse hook scoped to the bicameral.preflight tool: when preflight
+# surfaces ≥1 decision, prints a system-reminder templating the
+# correction-capture loop (Step 5.6 of bicameral-preflight) so the agent
+# reliably calls bicameral.ingest(source=agent_session) +
+# bicameral.resolve_collision when the user's prompt contradicts a
+# surfaced decision. Closes #154 for end-user installs (the dogfood path
+# invokes the source file directly via python3).
+_BICAMERAL_COLLISION_CAPTURE_REMINDER_COMMAND = "bicameral-mcp-collision-capture-reminder"
+_BICAMERAL_PREFLIGHT_TOOL_NAME = "mcp__bicameral__bicameral_preflight"
 
 
 def _install_claude_hooks(repo_path: Path) -> bool:
     """Merge bicameral hooks into the project-level .claude/settings.json.
 
-    Installs two hooks:
+    Installs four hooks:
     - PostToolUse/Bash: reminds the agent to call link_commit immediately
       after git write-ops (commit / merge / pull / rebase --continue).
+    - PostToolUse/bicameral_preflight: reminds the agent to capture
+      refinements via ingest(agent_session) + resolve_collision when
+      preflight surfaces decisions that the user's prompt contradicts.
     - SessionEnd: runs bicameral-capture-corrections to catch uningested
       mid-session corrections (only fires when .bicameral/ exists).
+    - UserPromptSubmit: deterministic verb-list classifier injects a
+      <system-reminder> elevating bicameral.preflight above the agent's
+      default tool-selection priority on code-implementation prompts.
 
     Idempotent — safe to call on every setup run. Returns True if any new
-    entry was written, False if both were already present.
+    entry was written, False if all four were already present.
     """
     settings_path = repo_path / ".claude" / "settings.json"
     settings_path.parent.mkdir(parents=True, exist_ok=True)
@@ -386,9 +461,7 @@ def _install_claude_hooks(repo_path: Path) -> bool:
 
     # ── PostToolUse / Bash — git write-op reminder ───────────────────
     post_tool_use: list = hooks.setdefault("PostToolUse", [])
-    bash_entry = next(
-        (e for e in post_tool_use if e.get("matcher") == "Bash"), None
-    )
+    bash_entry = next((e for e in post_tool_use if e.get("matcher") == "Bash"), None)
     if bash_entry is None:
         bash_entry = {"matcher": "Bash", "hooks": []}
         post_tool_use.append(bash_entry)
@@ -400,11 +473,35 @@ def _install_claude_hooks(repo_path: Path) -> bool:
         bash_entry["hooks"] = non_bic + [new_post_hook]
         wrote_anything = True
 
+    # ── PostToolUse / bicameral_preflight — collision capture reminder ─
+    preflight_entry = next(
+        (e for e in post_tool_use if e.get("matcher") == _BICAMERAL_PREFLIGHT_TOOL_NAME),
+        None,
+    )
+    if preflight_entry is None:
+        preflight_entry = {"matcher": _BICAMERAL_PREFLIGHT_TOOL_NAME, "hooks": []}
+        post_tool_use.append(preflight_entry)
+    old_pre_hooks = preflight_entry.get("hooks", [])
+    non_bic_pre = [
+        h
+        for h in old_pre_hooks
+        if "bicameral" not in h.get("command", "")
+        and "post_preflight_capture_reminder" not in h.get("command", "")
+    ]
+    new_pre_hook = {
+        "type": "command",
+        "command": _BICAMERAL_COLLISION_CAPTURE_REMINDER_COMMAND,
+    }
+    if non_bic_pre != old_pre_hooks or new_pre_hook not in old_pre_hooks:
+        preflight_entry["hooks"] = non_bic_pre + [new_pre_hook]
+        wrote_anything = True
+
     # ── SessionEnd — capture uningested corrections ──────────────────
     session_end: list = hooks.setdefault("SessionEnd", [])
     # Remove any stale bicameral SessionEnd entries, then write current.
     non_bic_se = [
-        e for e in session_end
+        e
+        for e in session_end
         if not any("bicameral" in h.get("command", "") for h in e.get("hooks", []))
     ]
     new_se_entry = {"hooks": [{"type": "command", "command": _BICAMERAL_SESSION_END_COMMAND}]}
@@ -412,6 +509,23 @@ def _install_claude_hooks(repo_path: Path) -> bool:
         hooks["SessionEnd"] = non_bic_se + [new_se_entry]
         wrote_anything = True
 
+    # ── UserPromptSubmit — preflight auto-fire reinforcement ─────────
+    user_prompt_submit: list = hooks.setdefault("UserPromptSubmit", [])
+    non_bic_ups = [
+        e
+        for e in user_prompt_submit
+        if not any(
+            "bicameral" in h.get("command", "") or "preflight_reminder" in h.get("command", "")
+            for h in e.get("hooks", [])
+        )
+    ]
+    new_ups_entry = {
+        "hooks": [{"type": "command", "command": _BICAMERAL_PREFLIGHT_REMINDER_COMMAND}]
+    }
+    if non_bic_ups != user_prompt_submit or new_ups_entry not in user_prompt_submit:
+        hooks["UserPromptSubmit"] = non_bic_ups + [new_ups_entry]
+        wrote_anything = True
+
     if wrote_anything:
         settings_path.write_text(json.dumps(existing, indent=2) + "\n")
     return wrote_anything
@@ -495,7 +609,9 @@ def _select_collaboration_mode() -> str:
     result = questionary.select(
         "Collaboration mode:",
         choices=[
-            questionary.Choice("Team  — decisions shared via git (append-only event files)", value="team"),
+            questionary.Choice(
+                "Team  — decisions shared via git (append-only event files)", value="team"
+            ),
             questionary.Choice("Solo  — decisions stored locally", value="solo"),
         ],
         default="team",
@@ -559,7 +675,9 @@ def _select_telemetry() -> bool:
     result = questionary.select(
         "Enable anonymous telemetry?",
         choices=[
-            questionary.Choice("Yes  — share anonymous usage stats to improve Bicameral", value=True),
+            questionary.Choice(
+                "Yes  — share anonymous usage stats to improve Bicameral", value=True
+            ),
             questionary.Choice("No   — keep telemetry off", value=False),
         ],
         default=True,
@@ -690,7 +808,7 @@ def run_setup(repo_hint: str | None = None, history_hint: str | None = None) ->
     # Step 3: Runner check
     command, _ = _detect_runner()
     if command not in ("bicameral-mcp",):
-        print(f"\n  Note: bicameral-mcp binary not found on PATH.")
+        print("\n  Note: bicameral-mcp binary not found on PATH.")
         print(f"  Using '{command} -m bicameral_mcp' as runner.")
         print("  Install for a cleaner setup: pip install bicameral-mcp")
 
@@ -708,7 +826,9 @@ def run_setup(repo_hint: str | None = None, history_hint: str | None = None) ->
     # Step 5: Install MCP config for each agent
     print()
     for agent_key in agents:
-        _install_for_agent(agent_key, repo_path, data_path=data_path, mode=collab_mode, telemetry=telemetry)
+        _install_for_agent(
+            agent_key, repo_path, data_path=data_path, mode=collab_mode, telemetry=telemetry
+        )
 
     # Step 6: Install skills + hooks (Claude Code only)
     if "claude" in agents:
@@ -716,12 +836,16 @@ def run_setup(repo_hint: str | None = None, history_hint: str | None = None) ->
         if num_skills:
             print(f"  Claude Code: installed {num_skills} slash commands")
         if _install_claude_hooks(repo_path):
-            print("  Claude Code: installed hooks → link_commit on commit · capture-corrections on session end")
+            print(
+                "  Claude Code: installed hooks → link_commit on commit · capture-corrections on session end"
+            )
 
     # Step 7: Git post-commit hook (Guided mode only)
     if guided:
         if _install_git_post_commit_hook(repo_path):
-            print("  Git: installed post-commit hook → bicameral-mcp link_commit HEAD after every commit")
+            print(
+                "  Git: installed post-commit hook → bicameral-mcp link_commit HEAD after every commit"
+            )
         else:
             print("  Git: post-commit hook already present — skipped")
 
@@ -759,6 +883,7 @@ def run_config_wizard() -> int:
     """
     import subprocess
     import sys
+
     try:
         import yaml
     except ImportError:
@@ -818,7 +943,9 @@ def run_config_wizard() -> int:
     )
     result = subprocess.run(
         [sys.executable, "-c", script],
-        capture_output=True, text=True, timeout=30,
+        capture_output=True,
+        text=True,
+        timeout=30,
     )
     skills_n = int(result.stdout.strip() or "0") if result.returncode == 0 else 0
 
@@ -842,10 +969,13 @@ def _print_change(label: str, old, new) -> None:
 
 def _select_collaboration_mode_with_default(current: str) -> str:
     import questionary
+
     if not _is_interactive():
         return current
     choices = [
-        questionary.Choice("Team  — decisions shared via git (append-only event files)", value="team"),
+        questionary.Choice(
+            "Team  — decisions shared via git (append-only event files)", value="team"
+        ),
         questionary.Choice("Solo  — decisions stored locally", value="solo"),
     ]
     result = questionary.select(
@@ -858,6 +988,7 @@ def _select_collaboration_mode_with_default(current: str) -> str:
 
 def _select_guided_mode_with_default(current: bool) -> bool:
     import questionary
+
     if not _is_interactive():
         return current
     choices = [
@@ -874,6 +1005,7 @@ def _select_guided_mode_with_default(current: bool) -> bool:
 
 def _select_telemetry_with_default(current: bool) -> bool:
     import questionary
+
     if not _is_interactive():
         return current
     choices = [
@@ -895,6 +1027,7 @@ def run_reset_wizard() -> int:
     then asks for explicit confirmation before wiping.
     """
     import asyncio
+
     import questionary
 
     print()
@@ -924,6 +1057,7 @@ def run_reset_wizard() -> int:
 
     # Step 2: dry-run
     import os
+
     from context import BicameralContext
     from handlers.reset import handle_reset
 
diff --git a/skills/bicameral-preflight/SKILL.md b/skills/bicameral-preflight/SKILL.md
index 17282cb2..6be030df 100644
--- a/skills/bicameral-preflight/SKILL.md
+++ b/skills/bicameral-preflight/SKILL.md
@@ -59,6 +59,22 @@ If uncertain whether the user will write code, **fire anyway** — the
 handler is gated on actionable signal and will stay silent if nothing
 relevant is found. The cost of a false fire is one silent no-op.
 
+### Hook reinforcement
+
+The trigger described above is reinforced by a `UserPromptSubmit` hook
+configured in [`.claude/settings.json`](../../.claude/settings.json).
+The hook reads the user prompt, runs a deterministic regex over the
+canonical verb list at
+[`scripts/hooks/preflight_intent.py`](../../scripts/hooks/preflight_intent.py),
+and — on match — injects a `<system-reminder>` block elevating
+`bicameral.preflight` above the agent's default tool-selection priority.
+
+For v0 the verb list is duplicated by intent: the SKILL.md
+`description` field above embeds the list as a string literal so
+Claude Code skill discovery can read it, while the Python module is
+the canonical source for the hook. Both must be edited together to
+evolve the trigger surface; future configurability will deduplicate.
+
 ## Telemetry
 
 > **Guard**: Only call `skill_begin` and `skill_end` if telemetry is enabled. Telemetry is enabled by default; disabled by setting `BICAMERAL_TELEMETRY=0` (or `false`/`off`/`no`). If disabled, skip both calls and omit all `diagnostic` tracking.
@@ -123,10 +139,18 @@ case proceed directly to step 2.
 
 ### 2. Call `bicameral.preflight` for region-anchored and HITL state
 
+**Discover first, then preflight.** Before this call, use Read / Grep / Glob to
+resolve the user's request to concrete file paths. The user often names a
+*feature* ("the reorder feature", "the rate limiter") rather than a *file*; the
+caller LLM is responsible for that mapping — the server does deterministic
+retrieval, not semantic guessing. A topic-only call falls back to fuzzy text
+similarity over decision descriptions; passing `file_paths` engages the
+high-precision `binds_to` graph lookup.
+
 ```
 bicameral.preflight(
   topic="<the 1-line topic>",
-  file_paths=["<repo-relative path>", ...],  # include if you've scoped the files
+  file_paths=["<repo-relative path>", ...],  # discovered in step 1
 )
 ```
 
@@ -144,8 +168,25 @@ those into your in-scope set.
 The response also carries an optional `sync_metrics` field — skip rendering it.
 If `response.product_stage` is non-null, surface it verbatim to the user as a brief note (shown once per device only).
 
-**Omit `file_paths`** if you haven't scoped the files yet (early "how should I
-approach X?" queries). The handler still runs sync and HITL checks.
+**`file_paths` may be omitted only** for genuinely abstract queries with no
+file referent yet (e.g. *"how should I approach building a retry helper?"* —
+no existing files to point at). For implementation prompts that name or imply
+a feature backed by existing code, populate `file_paths` from your discovery.
+The handler still runs sync and HITL checks either way; passing `file_paths`
+just unlocks the precision channel.
+
+The server expands caller-supplied `file_paths` by 1 hop along the
+code-locator graph's **import edges** (file-level structural
+dependency), so a decision bound to `app/src/lib/git/reorder.ts` still
+surfaces when the caller passes the structurally-near
+`app/src/ui/multi-commit-operation/reorder.tsx` (because the latter
+imports the former). You should still pass concrete paths discovered
+in step 1 — the expansion lifts the recall ceiling on near-misses, it
+doesn't replace caller-side discovery. Decisions reached only via the
+expansion carry `confidence=0.7` in the response (vs `0.9` for direct
+pins), and `sources_chained` includes `"graph"` (alongside `"region"`)
+when expansion contributed at least one hit. Caller can de-prioritize
+expanded matches without losing them.
 
 ### 2.5 Resolve pending compliance checks if present
 
@@ -311,6 +352,105 @@ A one-line forward narration helps:
 > from idempotency.ts. I'll flag the event.id deduplication question
 > for you to answer before I commit."
 
+### 5.6 Capture refinements — ask the user, then act mechanically
+
+When preflight surfaced ≥1 decision and the user's request operates on or
+near the same feature surface, **do not judge contradiction yourself.**
+LLM contradiction detection has been observed to silently miss
+structural-mismatch refinements (e.g. user asks for a "programmatic API
+to reorder commits" while a prior decision describes "drag-to-reorder
+UI" — the conflict is real but not lexical, and the agent rationalizes
+"these can coexist"). Per #175, the judgment moves to the user.
+
+#### 5.6.1 Disambiguate via `AskUserQuestion`
+
+Fires whenever `response.fired == True` and `len(response.decisions) >= 1`,
+regardless of guided mode (capture is the headline product behavior, not
+opt-in). Ask once per surfaced decision the user's request plausibly
+touches; skip for surfaced decisions that are clearly unrelated to the
+prompt domain.
+
+```python
+AskUserQuestion({
+  "question": (
+      "Your request appears to operate on the same feature surface as "
+      "surfaced decision <decision_id> ('<one-line description>'). "
+      "Treat this work as a refinement of that prior plan?"
+  ),
+  "multiSelect": False,
+  "options": [
+    {
+      "label": "Yes — supersede prior plan",
+      "description": "<paraphrase user's direction; replaces the prior decision wholesale>",
+    },
+    {
+      "label": "Yes — keep both (addition or scoping)",
+      "description": "<paraphrase; adds to or narrows the prior decision; both remain>",
+    },
+    {
+      "label": "No — unrelated to prior plan",
+      "description": "Continue without capture",
+    },
+  ],
+})
+```
+
+#### 5.6.2 Mechanical capture (after user disambiguation)
+
+Based on the user's selection, branch:
+
+- **"supersede"** → execute the two-call capture below with `action="supersede"`.
+- **"keep both"** → execute the two-call capture below with `action="keep_both"`.
+- **"unrelated"** → skip capture; proceed to implementation. Narrate one
+  line ("noted — surfaced context isn't applicable here") and move on.
+
+For the two "yes" branches:
+
+1. **Ingest the refinement** with `source=agent_session`, scoped to the
+   same `feature_group` as the surfaced decision:
+
+```
+bicameral.ingest(payload={
+  "query": "<surfaced decision's topic>",
+  "source": "agent_session",
+  "title": "<short label, e.g. 'reorder-programmatic-api'>",
+  "date": "<today ISO date>",
+  "decisions": [{ "description": "<user's direction, stated as a decision>" }]
+}, feature_group="<same feature group as the surfaced decision>")
+```
+
+2. **Wire it to the seeded decision** via `bicameral.resolve_collision`:
+
+```
+bicameral.resolve_collision(
+  new_id="<just-ingested refinement id>",
+  old_id="<surfaced decision id>",
+  action="supersede" | "keep_both" | "link_parent"
+)
+```
+
+`link_parent` is also available (selectable at the `AskUserQuestion`
+step if the surfaced decision is an L1 parent and the user's direction
+is an L2 child) — wires `parent_decision_id`, no supersede edge, no
+status change.
+
+The user has answered the disambiguation question, so capture is
+mechanical from this point. PM ratifies in the inbox.
+
+Narrate one line: *"Captured refinement: '<paraphrase>' — wired as
+<action> of <feature> roadmap entry."*
+
+#### Hook reinforcement
+
+A PostToolUse hook scoped to `mcp__bicameral__bicameral_preflight` injects a
+`<system-reminder>` after every preflight call that surfaces ≥1 decision. The
+reminder templates Step 5.6.1's `AskUserQuestion` shape with the surfaced
+`decision_id` + description filled in, so the question fires reliably even
+when the agent's natural inclination would be to skip the disambiguation.
+Source: `scripts/hooks/post_preflight_capture_reminder.py`; wired by
+`setup_wizard._install_claude_hooks` and the e2e harness's
+`materialize_settings_with_hooks`.
+
 ### 6. Honor blocking hints (guided mode vs normal mode)
 
 The agent's `guided_mode` setting controls whether action hints are
diff --git a/telemetry.py b/telemetry.py
index 9c291fac..efe2d72c 100644
--- a/telemetry.py
+++ b/telemetry.py
@@ -42,7 +42,6 @@
 
 import json
 import logging
-import os
 import threading
 import uuid
 from pathlib import Path
@@ -60,6 +59,7 @@ def _is_enabled() -> bool:
     the env-var override (BICAMERAL_TELEMETRY=0) continues to work.
     """
     from consent import telemetry_allowed
+
     return telemetry_allowed()
 
 
@@ -83,6 +83,7 @@ def _send_bg(payload: dict) -> None:
     """POST to the relay in a daemon thread. Never raises."""
     try:
         import urllib.request
+
         data = json.dumps(payload).encode()
         req = urllib.request.Request(
             _RELAY_URL,
@@ -98,7 +99,9 @@ def _send_bg(payload: dict) -> None:
         logger.debug("[telemetry] relay POST failed (non-fatal): %s", exc)
 
 
-def send_event(version: str, diagnostic: dict | None = None, **properties: str | int | float | bool) -> None:
+def send_event(
+    version: str, diagnostic: dict | None = None, **properties: str | int | float | bool
+) -> None:
     """Send a telemetry event. Fire-and-forget. Never raises.
 
     The relay only requires `distinct_id` and `version` — all other kwargs are
@@ -118,6 +121,7 @@ def send_event(version: str, diagnostic: dict | None = None, **properties: str |
     # Privacy-preserving: only the skill/tool name + 1 are written, no payload.
     try:
         from local_counters import increment as _local_increment
+
         skill_name = properties.get("skill") or properties.get("tool")
         if isinstance(skill_name, str):
             _local_increment(skill_name)
diff --git a/tests/_extract_headless.py b/tests/_extract_headless.py
index cc28b5e2..27dc2e8f 100644
--- a/tests/_extract_headless.py
+++ b/tests/_extract_headless.py
@@ -20,6 +20,7 @@
 them with "OAuth authentication is currently not supported" (401).
 Standard API keys (sk-ant-api03...) authenticate via x-api-key.
 """
+
 from __future__ import annotations
 
 import hashlib
@@ -148,7 +149,7 @@ def _extract_step1_excerpt(skill_md: str) -> str:
 
     next_header = _STEP_HEADER_RE.search(body, step1_match.end())
     end = next_header.start() if next_header else len(body)
-    return body[step1_match.start():end].strip()
+    return body[step1_match.start() : end].strip()
 
 
 def _cache_path(skill_sha: str, transcript_sha: str, model: str) -> Path:
diff --git a/tests/_extraction_matcher.py b/tests/_extraction_matcher.py
index 027407cc..94ed7426 100644
--- a/tests/_extraction_matcher.py
+++ b/tests/_extraction_matcher.py
@@ -27,6 +27,7 @@
 - Offline tests use the rapidfuzz fallback in _extraction_metrics.py
   by passing matcher="rapidfuzz" explicitly, so no network is needed.
 """
+
 from __future__ import annotations
 
 import hashlib
@@ -280,9 +281,7 @@ def llm_match(
             "Set the env var, or pass matcher='rapidfuzz' explicitly."
         )
 
-    tool_input = _call_matcher_api(
-        actual, expected, model=chosen_model, api_key=chosen_key
-    )
+    tool_input = _call_matcher_api(actual, expected, model=chosen_model, api_key=chosen_key)
     pairs = _parse_matches(tool_input, n_actual=len(actual), n_expected=len(expected))
 
     if use_cache:
diff --git a/tests/_extraction_metrics.py b/tests/_extraction_metrics.py
index de4346ab..8b1e4be6 100644
--- a/tests/_extraction_metrics.py
+++ b/tests/_extraction_metrics.py
@@ -38,6 +38,7 @@
 fixture-less transcripts don't break CI before the ground-truth set is
 bootstrapped.
 """
+
 from __future__ import annotations
 
 import json
@@ -67,9 +68,7 @@ def _descs(items: list[dict]) -> list[str]:
     return [str(d.get("description", "")).strip() for d in items if d.get("description")]
 
 
-def _rapidfuzz_match(
-    actual: list[str], expected: list[str]
-) -> list[tuple[int, int | None]]:
+def _rapidfuzz_match(actual: list[str], expected: list[str]) -> list[tuple[int, int | None]]:
     """Rapidfuzz 1:1 matching. Returns (actual_idx, expected_idx | None) pairs.
 
     For each actual in order, pick the best remaining expected by
@@ -143,6 +142,7 @@ def compute_extraction_metrics(
         # Import inside the function so offline tests that force
         # matcher="rapidfuzz" don't drag in httpx / network code.
         from _extraction_matcher import llm_match  # type: ignore[import-not-found]
+
         pairs = llm_match(actual, expected)
     elif chosen == "rapidfuzz":
         pairs = _rapidfuzz_match(actual, expected)
diff --git a/tests/bench_drift.py b/tests/bench_drift.py
index e56477fc..6e03cba3 100644
--- a/tests/bench_drift.py
+++ b/tests/bench_drift.py
@@ -108,7 +108,9 @@ async def _collect_real_symbols(adapter, repo_path: Path, n_files_target: int) -
     files: list[Path] = []
     for d in seed_dirs:
         if d.exists():
-            files.extend(sorted(p for p in d.rglob("*.py") if p.is_file() and "__pycache__" not in p.parts))
+            files.extend(
+                sorted(p for p in d.rglob("*.py") if p.is_file() and "__pycache__" not in p.parts)
+            )
 
     collected: list[dict] = []
     seen_pairs: set[str] = set()
@@ -129,11 +131,13 @@ async def _collect_real_symbols(adapter, repo_path: Path, n_files_target: int) -
             if key in seen_pairs:
                 continue
             seen_pairs.add(key)
-            collected.append({
-                "file_path": rel,
-                "symbol_name": sym,
-                "line_number": line,
-            })
+            collected.append(
+                {
+                    "file_path": rel,
+                    "symbol_name": sym,
+                    "line_number": line,
+                }
+            )
     return collected
 
 
@@ -146,26 +150,30 @@ def _build_payload(symbols: list[dict], batch_idx: int, batch_size: int) -> dict
     mappings = []
     for i in range(batch_size):
         sym = symbols[(batch_idx * batch_size + i) % len(symbols)]
-        mappings.append({
-            "span": {
-                "span_id": f"bench-{batch_idx}-{i}",
-                "source_type": "transcript",
-                "text": f"Bench decision {batch_idx}-{i} about {sym['symbol_name']}",
-                "speaker": "bench",
-                "source_ref": f"bench-meeting-{batch_idx}",
-            },
-            "intent": f"Bench decision {batch_idx}-{i}: maintain {sym['symbol_name']} in {sym['file_path']}",
-            "symbols": [sym["symbol_name"]],
-            "code_regions": [{
-                "file_path": sym["file_path"],
-                "symbol": sym["symbol_name"],
-                "type": "function",
-                "start_line": sym["line_number"],
-                "end_line": sym["line_number"] + 20,
-                "purpose": f"bench batch {batch_idx} item {i}",
-            }],
-            "dependency_edges": [],
-        })
+        mappings.append(
+            {
+                "span": {
+                    "span_id": f"bench-{batch_idx}-{i}",
+                    "source_type": "transcript",
+                    "text": f"Bench decision {batch_idx}-{i} about {sym['symbol_name']}",
+                    "speaker": "bench",
+                    "source_ref": f"bench-meeting-{batch_idx}",
+                },
+                "intent": f"Bench decision {batch_idx}-{i}: maintain {sym['symbol_name']} in {sym['file_path']}",
+                "symbols": [sym["symbol_name"]],
+                "code_regions": [
+                    {
+                        "file_path": sym["file_path"],
+                        "symbol": sym["symbol_name"],
+                        "type": "function",
+                        "start_line": sym["line_number"],
+                        "end_line": sym["line_number"] + 20,
+                        "purpose": f"bench batch {batch_idx} item {i}",
+                    }
+                ],
+                "dependency_edges": [],
+            }
+        )
     return {
         "query": f"bench batch {batch_idx}",
         "repo": ".",
@@ -189,12 +197,16 @@ async def _run_bench(ctx) -> None:
     adapter = get_code_locator()
 
     # --- Setup: collect real symbols, ingest 100 decisions in batches of 10 ---
-    symbols = await _collect_real_symbols(adapter, Path(ctx.repo_path), n_files_target=N_FILES_TARGET)
+    symbols = await _collect_real_symbols(
+        adapter, Path(ctx.repo_path), n_files_target=N_FILES_TARGET
+    )
     assert len(symbols) >= 25, f"Only got {len(symbols)} symbols; need >= 25 for realistic bench"
 
     batch_size = 10
     n_batches = N_DECISIONS // batch_size
-    print(f"\n[bench] Ingesting {N_DECISIONS} decisions across {len(symbols)} unique symbols ({n_batches} batches of {batch_size})")
+    print(
+        f"\n[bench] Ingesting {N_DECISIONS} decisions across {len(symbols)} unique symbols ({n_batches} batches of {batch_size})"
+    )
 
     setup_start = time.perf_counter()
     for b in range(n_batches):
@@ -262,11 +274,15 @@ async def _run_bench(ctx) -> None:
     print("DRIFT BENCHMARK BASELINE — V1 A1")
     print("=" * 68)
     print(f"Setup: {N_DECISIONS} decisions, {len(symbols)} symbols, {len(file_paths)} files")
-    print(f"Setup ingest: {setup_elapsed:.2f}s total ({setup_elapsed/N_DECISIONS*1000:.1f}ms / decision)")
+    print(
+        f"Setup ingest: {setup_elapsed:.2f}s total ({setup_elapsed / N_DECISIONS * 1000:.1f}ms / decision)"
+    )
     print()
     print(f"{'handler':<25} {'p50 (ms)':>10} {'p95 (ms)':>10} {'max (ms)':>10} {'n':>5}")
     print("-" * 68)
     for name, p in report["handlers"].items():
-        print(f"{name:<25} {p['p50']*1000:>10.1f} {p['p95']*1000:>10.1f} {p['max']*1000:>10.1f} {p['n']:>5}")
+        print(
+            f"{name:<25} {p['p50'] * 1000:>10.1f} {p['p95'] * 1000:>10.1f} {p['max'] * 1000:>10.1f} {p['n']:>5}"
+        )
     print("=" * 68)
     print(f"Artifact: {out_path}")
diff --git a/tests/conftest.py b/tests/conftest.py
index 46856c4f..4042b11f 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -26,10 +26,7 @@ def _isolate_consent_state(tmp_path_factory):
     third-party fixture plugin.
     """
     home = tmp_path_factory.mktemp("bicameral_home")
-    saved = {
-        k: os.environ.get(k)
-        for k in ("HOME", "USERPROFILE", "BICAMERAL_SKIP_CONSENT_NOTICE")
-    }
+    saved = {k: os.environ.get(k) for k in ("HOME", "USERPROFILE", "BICAMERAL_SKIP_CONSENT_NOTICE")}
     os.environ["HOME"] = str(home)
     os.environ["USERPROFILE"] = str(home)
     os.environ["BICAMERAL_SKIP_CONSENT_NOTICE"] = "1"
@@ -48,7 +45,9 @@ def pytest_configure(config):
     config.addinivalue_line("markers", "phase2: requires SurrealDBLedgerAdapter + SurrealDB")
     config.addinivalue_line("markers", "phase3: full E2E — requires both Phase 1 + Phase 2")
     config.addinivalue_line("markers", "alpha_flow: Jacob North Star regression suite — v0.7 gate")
-    config.addinivalue_line("markers", "bench: drift benchmark harness (V1 A1) — skipped by default, run with -m bench")
+    config.addinivalue_line(
+        "markers", "bench: drift benchmark harness (V1 A1) — skipped by default, run with -m bench"
+    )
 
 
 @pytest.fixture(autouse=True)
@@ -69,6 +68,7 @@ def _default_authoritative_ref_to_current_branch(monkeypatch):
     the start of the test, which unsets this default for that test only.
     """
     import subprocess
+
     try:
         result = subprocess.run(
             ["git", "rev-parse", "--abbrev-ref", "HEAD"],
@@ -84,11 +84,12 @@ def _default_authoritative_ref_to_current_branch(monkeypatch):
         monkeypatch.setenv("BICAMERAL_AUTHORITATIVE_REF", current_branch)
 
 
-
 @pytest.fixture
 def repo_path() -> str:
     """Repo root. Defaults to the MCP repo itself for Phase 1+ tests."""
-    return os.getenv("REPO_PATH", str(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))))
+    return os.getenv(
+        "REPO_PATH", str(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
+    )
 
 
 @pytest.fixture
@@ -100,6 +101,7 @@ def surreal_url() -> str:
 def ctx():
     """Build a BicameralContext from current env (SURREAL_URL, REPO_PATH)."""
     from context import BicameralContext
+
     return BicameralContext.from_env()
 
 
diff --git a/tests/e2e/README.md b/tests/e2e/README.md
new file mode 100644
index 00000000..204fd333
--- /dev/null
+++ b/tests/e2e/README.md
@@ -0,0 +1,104 @@
+# v0 user flow e2e
+
+End-to-end validation of `BicameralAI/bicameral#108`'s six canonical user
+flows, driven by **real Claude Code CLI sessions** with `bicameral-mcp`
+registered as an MCP server. Test fixture: a pinned commit of
+`github.com/desktop/desktop`, with `docs/process/roadmap.md` as ingest
+content.
+
+This is the canonical CI test for the spec. The handler-replay simulation
+at `scripts/sim_issue_108_flows.py` complements it for fast local iteration
+on handler logic without burning Claude API calls.
+
+## What it tests
+
+Each flow corresponds to a section of [bicameral#108 spec](https://github.com/BicameralAI/bicameral/issues/108):
+
+| Flow | Spec section | Asserts |
+|---|---|---|
+| 1 | Record decisions from a meeting | `bicameral.ingest` called with mappings |
+| 2 | Begin to write code (preflight) | `bicameral.preflight` called with `file_paths` |
+| 3 | Commit code → reflected | `bicameral.link_commit` + `bicameral.resolve_compliance` (with verdicts) |
+| 4 | End coding session | `bicameral.ingest` called with `source="agent_session"` |
+| 5 | Review what's been tracked | `bicameral.history` called (with seed ingest + ratify) |
+
+Each flow is a separate `claude -p` invocation with a fresh `memory://`
+ledger. Within a session, prompts may chain multiple tool calls — the
+asserter walks the entire stream-json transcript.
+
+## How it works
+
+```
+prompts/flow-N-*.md  →  claude -p  →  stream-json transcript  →  assert
+                          │
+                          ├─ --mcp-config bicameral.mcp.json  (registers bicameral-mcp)
+                          ├─ --strict-mcp-config              (no other MCP servers loaded)
+                          ├─ --allowed-tools mcp__bicameral Read Grep
+                          ├─ --add-dir <desktop_clone>        (skill Read access)
+                          └─ --output-format stream-json --verbose
+```
+
+`run_e2e_flows.py` orchestrates all five flows, captures transcripts to
+`test-results/e2e/flow-N.ndjson`, and asserts on the tool-use blocks.
+
+## Running locally
+
+```bash
+# 1. Install bicameral-mcp + Claude Code CLI
+cd pilot/mcp
+pip install -e ".[test]"
+npm install -g @anthropic-ai/claude-code
+
+# 2. Authenticate Claude Code CLI (interactive — once)
+claude auth
+
+# 3. Clone the test fixture
+git clone --depth=1 https://github.com/desktop/desktop /tmp/desktop-clone
+cd /tmp/desktop-clone && git checkout -b main && cd -
+
+# 4. Run all five flows
+DESKTOP_REPO_PATH=/tmp/desktop-clone python tests/e2e/run_e2e_flows.py
+```
+
+Cost per run: ~$0.50–$2.00 across all five flows depending on how much the
+LLM exercises in each session. Each run is bounded by `--max-budget-usd 2.0`
+per flow.
+
+## CI
+
+GitHub Actions workflow: `.github/workflows/v0-user-flow-e2e.yml`.
+
+- Triggers on PRs touching `tests/e2e/**`, `handlers/**`, `ledger/**`,
+  `contracts.py`, `skills/bicameral-*/**`, or the workflow itself.
+- Runs in the `production` GitHub environment for `CLAUDE_CODE_OAUTH_TOKEN`.
+- Pinned `desktop/desktop` commit in the workflow file (update by editing
+  the env var).
+- Uploads `test-results/e2e/*.ndjson` as job artifacts (30-day retention)
+  for failure forensics.
+
+## Updating
+
+When the spec changes, update both:
+
+1. The relevant `prompts/flow-N-*.md` (natural-language user prompt)
+2. The matching `assert_flow_N` in `run_e2e_flows.py`
+
+When `desktop/desktop`'s `roadmap.md` or `cherry-pick.ts` shape drifts in
+ways that break the prompts or bind targets, bump the pinned commit in
+the workflow + adjust prompts.
+
+## Why not handler-replay only?
+
+The handler-replay sim (`scripts/sim_issue_108_flows.py`) directly imports
+handler functions and calls them. It's fast and useful for iterating on
+handler logic, but it bypasses three layers we need to validate:
+
+- **MCP protocol** — JSON-RPC over stdio, tool schema marshalling
+- **Skill files** — `.claude/skills/bicameral-*/SKILL.md` parsing, trigger
+  matching, prompt construction
+- **Caller LLM** — natural-language → tool-call sequencing, auto-chains
+  (preflight → capture-corrections → context-sentry → ingest → judge_gaps)
+
+This e2e suite covers all three. Together they form the spec's two-level
+validation: handler invariants (replay sim) + user-experience contract
+(this directory).
diff --git a/tests/e2e/_harness_setup.py b/tests/e2e/_harness_setup.py
new file mode 100644
index 00000000..036358f8
--- /dev/null
+++ b/tests/e2e/_harness_setup.py
@@ -0,0 +1,246 @@
+"""Shared test-harness setup helpers.
+
+Used by:
+  - tests/e2e/run_e2e_flows.py (headless ``claude -p`` assertion test)
+  - tests/e2e/record_demo_interactive.sh (interactive tmux-driven recording)
+
+Both code paths must produce IDENTICAL artifacts (materialized MCP config,
+materialized claude settings with hooks, bootstrapped ``.bicameral/``) so the
+agent sees the same hook substrate and same MCP config regardless of which
+entry point invoked it. This module is the single source of truth for that
+materialization — no inline duplication in either consumer.
+
+A CLI entry point exists so shell scripts can invoke the same logic as the
+Python harness without re-implementing it inline. See ``__main__``.
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import pathlib
+import shutil
+import subprocess
+import sys
+
+
+def materialize_mcp_config(
+    template: pathlib.Path,
+    out_dir: pathlib.Path,
+    desktop_repo_path: str,
+    ledger_dir: pathlib.Path,
+) -> pathlib.Path:
+    """Read the MCP config template, substitute env-var placeholders, write
+    a runtime copy to ``<out_dir>/bicameral.mcp.materialized.json``.
+
+    The template uses ``${DESKTOP_REPO_PATH}`` and ``${LEDGER_DIR}`` so the
+    same template works locally (any clone path) and in CI (the workflow's
+    clone path). Claude Code's MCP spawn behaviour for env replacement vs
+    merge is implementation-defined; passing REPO_PATH explicitly via the
+    config avoids that ambiguity.
+    """
+    raw = template.read_text(encoding="utf-8")
+    materialized = raw.replace("${DESKTOP_REPO_PATH}", desktop_repo_path).replace(
+        "${LEDGER_DIR}", str(ledger_dir)
+    )
+    out = out_dir / "bicameral.mcp.materialized.json"
+    out.write_text(materialized, encoding="utf-8")
+    return out
+
+
+def materialize_settings_with_hooks(
+    out_dir: pathlib.Path,
+    mcp_config_path: pathlib.Path,
+    mcp_root: pathlib.Path,
+) -> pathlib.Path:
+    """Write a project-style ``settings.json`` carrying the four hooks
+    bicameral's setup-wizard installs in real projects. The PostToolUse and
+    UserPromptSubmit commands are byte-exact strings imported from
+    ``setup_wizard`` — single source of truth, no drift.
+
+    The SessionEnd command is built via ``setup_wizard._build_session_end_command``
+    with ``mcp_config_path`` set. Production end-users have ``bicameral``
+    registered in their default Claude Code MCP config so the spawned
+    subprocess inherits it without an explicit flag; test harnesses
+    override ``SURREAL_URL`` via the materialized MCP config to point at
+    a test-results ledger, so we MUST pass that config explicitly to the
+    subprocess or its ``capture-corrections`` writes land in the user's
+    default ledger and post-hoc validators find zero rows.
+
+    Hooks installed:
+      - PostToolUse/Bash: bicameral-sync listens for "new commit detected"
+        output to auto-fire ``link_commit``.
+      - PostToolUse/bicameral_preflight: collision-capture reminder fires
+        when preflight surfaces ≥1 decision, templating the Step 5.6
+        ingest(agent_session) + resolve_collision call so the agent
+        captures user refinements that contradict surfaced decisions.
+      - SessionEnd: spawns a subprocess running
+        ``/bicameral:capture-corrections --auto-ingest`` (with the test
+        MCP config) to scan the just-ended session for uningested
+        mid-session corrections.
+      - UserPromptSubmit: deterministic verb-list classifier injects a
+        <system-reminder> elevating bicameral.preflight above the agent's
+        default tool-selection priority on code-implementation prompts.
+    """
+    if str(mcp_root) not in sys.path:
+        sys.path.insert(0, str(mcp_root))
+    from setup_wizard import (  # noqa: E402
+        _BICAMERAL_COLLISION_CAPTURE_REMINDER_COMMAND,
+        _BICAMERAL_POST_COMMIT_COMMAND,
+        _BICAMERAL_PREFLIGHT_REMINDER_COMMAND,
+        _BICAMERAL_PREFLIGHT_TOOL_NAME,
+        _build_session_end_command,
+    )
+
+    session_end_command = _build_session_end_command(mcp_config_path=str(mcp_config_path))
+
+    settings = {
+        "hooks": {
+            "PostToolUse": [
+                {
+                    "matcher": "Bash",
+                    "hooks": [{"type": "command", "command": _BICAMERAL_POST_COMMIT_COMMAND}],
+                },
+                {
+                    "matcher": _BICAMERAL_PREFLIGHT_TOOL_NAME,
+                    "hooks": [
+                        {
+                            "type": "command",
+                            "command": _BICAMERAL_COLLISION_CAPTURE_REMINDER_COMMAND,
+                        }
+                    ],
+                },
+            ],
+            "SessionEnd": [
+                {
+                    "hooks": [{"type": "command", "command": session_end_command}],
+                }
+            ],
+            "UserPromptSubmit": [
+                {
+                    "hooks": [
+                        {"type": "command", "command": _BICAMERAL_PREFLIGHT_REMINDER_COMMAND}
+                    ],
+                }
+            ],
+        }
+    }
+    out = out_dir / "claude-settings-with-hook.json"
+    out.write_text(json.dumps(settings, indent=2), encoding="utf-8")
+    return out
+
+
+def clean_ledger(ledger_dir: pathlib.Path) -> None:
+    """Wipe the persistent ledger between harness runs.
+
+    State must persist across the 5 sequential claude sessions within a run
+    (so the PM in flow 5 sees decisions from flows 1/2/4), but must NOT leak
+    across runs (so each run is reproducible and CI is deterministic).
+    """
+    if ledger_dir.exists():
+        shutil.rmtree(ledger_dir, ignore_errors=True)
+
+
+def reset_desktop_repo(desktop_repo_path: str) -> None:
+    """Reset desktop-clone to its pinned HEAD between runs. Flow 3 makes a
+    real commit; without a reset, the second-onwards run starts from a
+    polluted base.
+    """
+    repo = pathlib.Path(desktop_repo_path)
+    if not (repo / ".git").exists():
+        return
+    for args in (("git", "reset", "--hard", "FETCH_HEAD"), ("git", "reset", "--hard", "HEAD")):
+        try:
+            subprocess.run(args, cwd=repo, check=True, capture_output=True, timeout=20)
+            return
+        except (subprocess.CalledProcessError, subprocess.TimeoutExpired):
+            continue
+
+
+def bootstrap_bicameral_dir(desktop_repo_path: str, mcp_root: pathlib.Path) -> None:
+    """Create a minimal ``.bicameral/`` inside ``desktop_repo_path`` so the
+    SessionEnd hook's ``[ -d .bicameral ]`` guard passes when the parent
+    claude session exits. Without this, the hook short-circuits silently
+    and Flow 4's path-X-(b) ledger validation has nothing to observe.
+
+    Reuses ``setup_wizard._write_collaboration_config`` to write the same
+    minimal ``config.yaml`` (mode=solo, guided=false, telemetry=false) a
+    fresh end-user install would produce — single source of truth.
+
+    Wiped + recreated each run so flows do not inherit cross-run state.
+    """
+    if str(mcp_root) not in sys.path:
+        sys.path.insert(0, str(mcp_root))
+    from setup_wizard import _write_collaboration_config  # noqa: E402
+
+    bicameral_dir = pathlib.Path(desktop_repo_path) / ".bicameral"
+    if bicameral_dir.exists():
+        shutil.rmtree(bicameral_dir, ignore_errors=True)
+    _write_collaboration_config(
+        data_path=pathlib.Path(desktop_repo_path),
+        mode="solo",
+        guided=False,
+        telemetry=False,
+    )
+
+
+def setup_all(
+    desktop_repo_path: str,
+    results_dir: pathlib.Path,
+    mcp_config_template: pathlib.Path,
+    mcp_root: pathlib.Path,
+    clean: bool = True,
+) -> dict[str, pathlib.Path]:
+    """Run every setup step in the canonical order. Returns the resulting
+    artifact paths so consumers can wire them through to the agent invocation.
+
+    When ``clean=True`` (default), wipes the ledger and resets the desktop
+    repo first. The harness uses this; the recording script uses it too —
+    state must persist across flows within a run, but not across runs.
+    """
+    results_dir.mkdir(parents=True, exist_ok=True)
+    ledger_dir = results_dir / "ledger.db"
+    if clean:
+        clean_ledger(ledger_dir)
+        reset_desktop_repo(desktop_repo_path)
+    bootstrap_bicameral_dir(desktop_repo_path, mcp_root)
+    mcp_config_path = materialize_mcp_config(
+        mcp_config_template, results_dir, desktop_repo_path, ledger_dir
+    )
+    settings_path = materialize_settings_with_hooks(results_dir, mcp_config_path, mcp_root)
+    return {"mcp_config": mcp_config_path, "settings": settings_path, "ledger": ledger_dir}
+
+
+def main() -> int:
+    """CLI entrypoint for shell consumers (record_demo_interactive.sh).
+
+    Prints the resulting artifact paths as ``<key>\\t<path>`` lines on
+    stdout so the shell can parse them with ``awk`` or ``cut`` if it
+    needs to thread them through to subsequent commands.
+    """
+    p = argparse.ArgumentParser(description=__doc__)
+    p.add_argument("--desktop-repo-path", required=True)
+    p.add_argument("--results-dir", required=True)
+    p.add_argument("--mcp-config-template", required=True)
+    p.add_argument("--mcp-root", required=True)
+    p.add_argument(
+        "--no-clean",
+        action="store_true",
+        help="skip ledger wipe + desktop-clone reset (default: wipe + reset)",
+    )
+    args = p.parse_args()
+
+    paths = setup_all(
+        desktop_repo_path=args.desktop_repo_path,
+        results_dir=pathlib.Path(args.results_dir),
+        mcp_config_template=pathlib.Path(args.mcp_config_template),
+        mcp_root=pathlib.Path(args.mcp_root),
+        clean=not args.no_clean,
+    )
+    for key, path in paths.items():
+        print(f"{key}\t{path}")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/tests/e2e/_ledger_helpers.py b/tests/e2e/_ledger_helpers.py
new file mode 100644
index 00000000..8d4be1bf
--- /dev/null
+++ b/tests/e2e/_ledger_helpers.py
@@ -0,0 +1,24 @@
+"""Pure helpers for ledger-based flow validation.
+
+Extracted from run_e2e_flows.py so unit tests can import without
+triggering the harness's top-level env-var / CLI-presence guards.
+"""
+
+from __future__ import annotations
+
+
+def count_agent_session_decisions(snapshot: dict) -> int | None:
+    """Count decisions with source_type='agent_session' in a ledger snapshot.
+
+    Returns None if the snapshot reports an error (caller treats as
+    INCONCLUSIVE, not FAIL — the assertion is unreliable when the ledger
+    isn't queryable). Returns 0 when there are no agent_session rows. The
+    'agent_session' source_type is the canonical tag written by both
+    in-session capture-corrections (path-A) and the SessionEnd subprocess
+    (path-B); this helper does not discriminate between them, only counts
+    the product-outcome signal.
+    """
+    if "error" in snapshot:
+        return None
+    decisions = snapshot.get("decisions") or []
+    return sum(1 for d in decisions if d.get("source_type") == "agent_session")
diff --git a/tests/e2e/bicameral.mcp.json b/tests/e2e/bicameral.mcp.json
new file mode 100644
index 00000000..e08b1508
--- /dev/null
+++ b/tests/e2e/bicameral.mcp.json
@@ -0,0 +1,12 @@
+{
+  "mcpServers": {
+    "bicameral": {
+      "command": "bicameral-mcp",
+      "args": [],
+      "env": {
+        "SURREAL_URL": "surrealkv://${LEDGER_DIR}",
+        "REPO_PATH": "${DESKTOP_REPO_PATH}"
+      }
+    }
+  }
+}
diff --git a/tests/e2e/demo_renderer.py b/tests/e2e/demo_renderer.py
new file mode 100755
index 00000000..18936668
--- /dev/null
+++ b/tests/e2e/demo_renderer.py
@@ -0,0 +1,137 @@
+#!/usr/bin/env python3
+# Pretty-print Claude Code stream-json to xterm and detect scene boundaries.
+#
+# Reads stream-json from stdin (one JSON object per line). Writes:
+#   - human-readable output to stdout (visible in the recorded xterm)
+#   - raw stream-json to $DEMO_TRANSCRIPT
+#   - scene-boundary timestamps to $DEMO_SCENES_FILE
+#
+# Scene boundaries (option a — tool-call ordering, no LLM-emitted sentinels):
+#   t1 (Scene 1 → Scene 2): first mcp__bicameral__bicameral_preflight call
+#   t2 (Scene 2 → Scene 3): first mcp__bicameral__bicameral_history call
+#                           AFTER any mcp__bicameral__bicameral_link_commit call
+
+from __future__ import annotations
+
+import json
+import os
+import sys
+import time
+from pathlib import Path
+
+TRANSCRIPT = Path(os.environ.get("DEMO_TRANSCRIPT", "/tmp/demo-transcript.ndjson"))
+SCENES_FILE = Path(os.environ.get("DEMO_SCENES_FILE", "/tmp/demo-scenes.txt"))
+
+
+def _record_scene(name: str) -> None:
+    with SCENES_FILE.open("a") as f:
+        f.write(f"{name}={time.time():.3f}\n")
+
+
+def _tool_bare(name: str) -> str:
+    return name.split("__")[-1] if "__" in name else name
+
+
+def _input_summary(payload: dict) -> str:
+    if not isinstance(payload, dict) or not payload:
+        return ""
+    parts: list[str] = []
+    for k, v in list(payload.items())[:3]:
+        s = str(v)
+        if len(s) > 60:
+            s = s[:57] + "..."
+        parts.append(f"{k}={s}")
+    return " ".join(parts)
+
+
+def _flush(line: str = "") -> None:
+    sys.stdout.write(line + "\n")
+    sys.stdout.flush()
+
+
+def main() -> int:
+    SCENES_FILE.write_text("")
+    TRANSCRIPT.write_text("")
+    _record_scene("recording_start")
+
+    saw_link_commit = False
+    saw_preflight = False
+    saw_post_history = False
+
+    raw = TRANSCRIPT.open("a")
+
+    for line in sys.stdin:
+        if not line.strip():
+            continue
+
+        raw.write(line)
+        raw.flush()
+
+        try:
+            obj = json.loads(line)
+        except json.JSONDecodeError:
+            continue
+
+        t = obj.get("type")
+
+        if t == "system" and obj.get("subtype") == "init":
+            _flush(f"[demo] session started — model={obj.get('model', '?')}")
+            continue
+
+        if t == "assistant":
+            msg = obj.get("message") or {}
+            for block in msg.get("content") or []:
+                btype = block.get("type")
+                if btype == "text":
+                    text = block.get("text", "").rstrip()
+                    if text:
+                        _flush()
+                        _flush(text)
+                elif btype == "tool_use":
+                    name = block.get("name", "")
+                    bare = _tool_bare(name)
+                    summary = _input_summary(block.get("input") or {})
+                    _flush(f"\n  ▸ tool: {bare}  {summary}".rstrip())
+
+                    if not saw_preflight and name.endswith("bicameral_preflight"):
+                        saw_preflight = True
+                        _record_scene("scene_1_to_2")
+                    if name.endswith("bicameral_link_commit"):
+                        saw_link_commit = True
+                    if (
+                        not saw_post_history
+                        and saw_link_commit
+                        and name.endswith("bicameral_history")
+                    ):
+                        saw_post_history = True
+                        _record_scene("scene_2_to_3")
+            continue
+
+        if t == "user":
+            msg = obj.get("message") or {}
+            for block in msg.get("content") or []:
+                if isinstance(block, dict) and block.get("type") == "tool_result":
+                    content = block.get("content") or ""
+                    if isinstance(content, list):
+                        content = "".join(
+                            part.get("text", "") if isinstance(part, dict) else str(part)
+                            for part in content
+                        )
+                    snippet = str(content).replace("\n", " ")
+                    if len(snippet) > 220:
+                        snippet = snippet[:217] + "..."
+                    _flush(f"  ◂ result: {snippet}")
+            continue
+
+        if t == "result":
+            duration = obj.get("duration_ms", "?")
+            cost = obj.get("total_cost_usd", "?")
+            _flush(f"\n[demo] session complete — duration={duration}ms cost=${cost}")
+
+    _record_scene("recording_end")
+    raw.close()
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/tests/e2e/prompts/composite-demo.md b/tests/e2e/prompts/composite-demo.md
new file mode 100644
index 00000000..55d38d05
--- /dev/null
+++ b/tests/e2e/prompts/composite-demo.md
@@ -0,0 +1,85 @@
+# Composite v0 user-flow demo (single session, three scenes)
+
+This is a continuous demo session that will be split in post into a "PM
+view" video (pm.mp4) and a "Dev view" video (dev.mp4). Walk through
+the three scenes below in order. Do not skip steps. Do not abbreviate.
+
+Before you begin: call `bicameral.dashboard` so the dashboard sidecar
+binds and the right pane of the recording has live ledger updates to
+show.
+
+---
+
+## SCENE 1 — Post-meeting (PM persona)
+
+You are the PM. The team just reviewed the GitHub Desktop roadmap.
+Ingest the following decisions into the ledger via `bicameral.ingest`:
+
+1. **High signal notifications (versions 2.9.10 and 3.0.0)** — Receive
+   a notification when checks fail. Receive a notification when your
+   pull request is reviewed.
+2. **Improved commit history (version 2.9.0)** — Reorder commits via
+   drag/drop. Squash commits via drag/drop. Amend last commit. Create
+   a branch from a previous commit.
+3. **Cherry-picking commits from one branch to another (version 2.7.1)**
+   — Cherry-pick commits with a context menu and interactively. Bind
+   this decision to `app/src/lib/git/cherry-pick.ts` (specifically the
+   `CherryPickResult` enum near the top of the file).
+
+Source: `desktop/desktop:docs/process/roadmap.md`.
+
+After `bicameral.ingest` returns, ratify the decisions you just
+ingested via `bicameral.ratify`. Briefly confirm what landed (decision
+IDs and signoff state) so the viewer understands the ledger now has
+proposed-then-ratified entries.
+
+---
+
+## SCENE 2 — Implementation (Dev persona)
+
+You are now the dev. Walk through the implementation arc end-to-end:
+
+1. Call `bicameral.preflight` on `app/src/lib/git/cherry-pick.ts` to
+   surface relevant decisions before editing. Read the response — it
+   should remind you about the cherry-pick decision from Scene 1.
+
+2. Use the `Edit` tool to add a single-line comment near the top of
+   `app/src/lib/git/cherry-pick.ts` referencing the cherry-pick
+   roadmap decision (e.g.,
+   `// Cherry-pick: roadmap v2.7.1 — context menu + interactive`).
+   Keep it minimal and non-disruptive.
+
+3. Stage and commit the change with `Bash`:
+   - `git add app/src/lib/git/cherry-pick.ts`
+   - `git commit -m "demo: annotate CherryPickResult with roadmap decision"`
+
+4. Call `bicameral.link_commit` on `HEAD` to detect drift against any
+   decisions bound to that file.
+
+5. For each pending compliance check that `link_commit` surfaces, call
+   `bicameral.resolve_compliance` with a verdict
+   (compliant / drifted / not_relevant). Use the file's content as
+   evidence.
+
+6. If any non-trivial decisions emerged mid-session (corrections,
+   constraint clarifications), capture them with `bicameral.ingest`
+   using `source=agent_session`.
+
+---
+
+## SCENE 3 — Post-implementation (PM persona)
+
+You are the PM again. The dev just landed their changes. Show how
+the ledger evolved:
+
+1. Call `bicameral.history`. The cherry-pick decision should now show
+   `status=reflected` (or `compliant`) where it was previously
+   pending or ungrounded.
+
+2. Render a brief markdown table grouped by feature area, showing each
+   decision's two axes — code-compliance status and human signoff
+   state — so the viewer can scan it.
+
+3. Ratify the post-implementation state of the cherry-pick decision
+   via `bicameral.ratify` to acknowledge that what shipped matches
+   what was decided.
diff --git a/tests/e2e/prompts/flow-1-ingest.md b/tests/e2e/prompts/flow-1-ingest.md
new file mode 100644
index 00000000..1d517167
--- /dev/null
+++ b/tests/e2e/prompts/flow-1-ingest.md
@@ -0,0 +1,9 @@
+Just out of roadmap review. Three things we agreed to track:
+
+- High-signal notifications (2.9.10 / 3.0.0): notify on failed checks, notify on PR review.
+- Improved commit history (2.9.0): drag-to-reorder, drag-to-squash, amend last commit, branch from a previous commit.
+- Cherry-pick between branches (2.7.1): context-menu and an interactive variant.
+
+Source: desktop/desktop:docs/process/roadmap.md.
+
+Already aligned with the team — please log these and sign them off on our end. If any have an obvious code home, bind them too so we can catch drift later.
diff --git a/tests/e2e/prompts/flow-2-preflight.md b/tests/e2e/prompts/flow-2-preflight.md
new file mode 100644
index 00000000..052957fd
--- /dev/null
+++ b/tests/e2e/prompts/flow-2-preflight.md
@@ -0,0 +1,3 @@
+Add a programmatic API for reordering commits — it takes an ordered list of commit SHAs and rewrites the branch history to match that order. Wire it so any UI surface can call it with a sorted list and apply the new order.
+
+I'll handle the call-site cleanup separately.
diff --git a/tests/e2e/prompts/flow-3-commit-sync.md b/tests/e2e/prompts/flow-3-commit-sync.md
new file mode 100644
index 00000000..742bc981
--- /dev/null
+++ b/tests/e2e/prompts/flow-3-commit-sync.md
@@ -0,0 +1,3 @@
+Need a quick docs commit. Drop a one-line comment above the CherryPickResult enum in cherry-pick.ts pointing back to the roadmap — something like `// Cherry-pick: roadmap v2.7.1 — context menu + interactive`.
+
+Stage and commit it as `docs: annotate cherry-pick origin`.
diff --git a/tests/e2e/prompts/flow-4-session-end.md b/tests/e2e/prompts/flow-4-session-end.md
new file mode 100644
index 00000000..8bee57e8
--- /dev/null
+++ b/tests/e2e/prompts/flow-4-session-end.md
@@ -0,0 +1,5 @@
+hmm wait — small thing before we keep going on reorder.
+
+just realized: the cherry-pick conflict handler shouldn't ever fall back to a stdin prompt. visual conflict UI is the only resolution path, full stop. if it drifts toward a terminal prompt that's a rollback.
+
+ok back to reorder.ts — keep going on the `reorder()` function for the text-editor flow.
diff --git a/tests/e2e/prompts/flow-5-history.md b/tests/e2e/prompts/flow-5-history.md
new file mode 100644
index 00000000..1f08b9e3
--- /dev/null
+++ b/tests/e2e/prompts/flow-5-history.md
@@ -0,0 +1,3 @@
+Doing a Friday review across all the things we're tracking. Walk me through them grouped by feature — for each one, where it stands on the implementation side and whether it's been signed off.
+
+Anything still on the to-do pile that hasn't moved — flag those, give me a one-sentence read on each, and pick whichever one looks most ready (clear scope, supporting context, no open questions) and sign it off. Then show me the updated view.
diff --git a/tests/e2e/record_demo.sh b/tests/e2e/record_demo.sh
new file mode 100755
index 00000000..a01a5281
--- /dev/null
+++ b/tests/e2e/record_demo.sh
@@ -0,0 +1,288 @@
+#!/usr/bin/env bash
+# Record a single continuous split-screen demo session of the v0 user flow,
+# then post-split the recording into pm.mp4 (PM persona) and dev.mp4
+# (Dev persona). pm.mp4 has a transition slide between the
+# pre-implementation and post-implementation chapters.
+#
+# Layout (1920x1080):
+#   ┌──────────────────────────┬──────────────────────────┐
+#   │  xterm                   │  chromium                │
+#   │  claude -p <composite>   │  http://localhost:<port> │
+#   │  (one continuous session │  bicameral dashboard     │
+#   │  spanning all 3 scenes)  │  (live SSE updates)      │
+#   └──────────────────────────┴──────────────────────────┘
+#
+# Single claude session = single MCP process = single in-memory ledger.
+# That's what makes Scene 3 (PM post-impl) authentically reflect Scene 2's
+# (Dev) commits — the dashboard SSE keeps state across the whole arc.
+#
+# This script runs only in the GitHub workflow's optional manual-dispatch
+# path (`record_demo=true`). It is `continue-on-error` at the workflow
+# level — a flake here never gates merge.
+
+set -euo pipefail
+
+# ── Config ──────────────────────────────────────────────────────────────
+DISPLAY_NUM=99
+RES_W=1920
+RES_H=1080
+HALF_W=$((RES_W / 2))
+RES="${RES_W}x${RES_H}"
+FRAMERATE=10
+TRANSITION_DURATION=4
+
+E2E_DIR="$(cd "$(dirname "$0")" && pwd)"
+MCP_DIR="$(cd "$E2E_DIR/../.." && pwd)"
+OUT_DIR="$MCP_DIR/docs/demos/v0-userflow-e2e"
+RESULTS_DIR="$MCP_DIR/test-results/e2e"
+MCP_CONFIG_TEMPLATE="$E2E_DIR/bicameral.mcp.json"
+MCP_CONFIG_MATERIALIZED="$RESULTS_DIR/bicameral.mcp.materialized.json"
+PORT_FILE="$HOME/.bicameral/dashboard.port"
+COMPOSITE_PROMPT_FILE="$E2E_DIR/prompts/composite-demo.md"
+DEMO_RENDERER="$E2E_DIR/demo_renderer.py"
+
+DESKTOP_REPO_PATH="${DESKTOP_REPO_PATH:-/tmp/desktop-clone}"
+
+mkdir -p "$OUT_DIR" "$RESULTS_DIR" "$(dirname "$PORT_FILE")"
+
+if [ ! -d "$DESKTOP_REPO_PATH" ]; then
+  echo "ERROR: DESKTOP_REPO_PATH=$DESKTOP_REPO_PATH does not exist." >&2
+  exit 2
+fi
+
+for bin in Xvfb fluxbox xterm ffmpeg claude bicameral-mcp python3; do
+  if ! command -v "$bin" >/dev/null 2>&1; then
+    echo "ERROR: required binary '$bin' not found on PATH." >&2
+    exit 2
+  fi
+done
+
+# Pick whichever chromium-compatible browser is available. GitHub's
+# ubuntu-latest runners ship google-chrome-stable; Linux desktops often
+# have chromium via snap. All four accept the same Chromium-style flags.
+CHROME_BIN="$(command -v google-chrome-stable \
+  || command -v google-chrome \
+  || command -v chromium \
+  || command -v chromium-browser \
+  || true)"
+if [ -z "$CHROME_BIN" ]; then
+  echo "ERROR: no chromium-compatible browser found on PATH." >&2
+  echo "  tried: google-chrome-stable, google-chrome, chromium, chromium-browser" >&2
+  exit 2
+fi
+echo "[demo] using browser: $CHROME_BIN"
+
+# ── Materialize MCP config (mirrors run_e2e_flows.py) ───────────────────
+sed "s|\${DESKTOP_REPO_PATH}|$DESKTOP_REPO_PATH|g" \
+  "$MCP_CONFIG_TEMPLATE" > "$MCP_CONFIG_MATERIALIZED"
+
+# Reset port file so the chromium poll only sees this run's value.
+rm -f "$PORT_FILE"
+
+# ── Start Xvfb + minimal WM ─────────────────────────────────────────────
+Xvfb ":${DISPLAY_NUM}" -screen 0 "${RES}x24" -nolisten tcp >/tmp/xvfb.log 2>&1 &
+XVFB_PID=$!
+export DISPLAY=":${DISPLAY_NUM}"
+sleep 1
+
+fluxbox >/tmp/fluxbox.log 2>&1 &
+FLUXBOX_PID=$!
+sleep 1
+
+cleanup() {
+  set +e
+  kill "$FLUXBOX_PID" "$XVFB_PID" 2>/dev/null
+  wait 2>/dev/null
+}
+trap cleanup EXIT
+
+# ── Recording paths ─────────────────────────────────────────────────────
+FULL_MP4="$OUT_DIR/full.mp4"
+TRANSCRIPT="$RESULTS_DIR/composite-demo-transcript.ndjson"
+SCENES_FILE="$RESULTS_DIR/composite-demo-scenes.txt"
+
+export DEMO_TRANSCRIPT="$TRANSCRIPT"
+export DEMO_SCENES_FILE="$SCENES_FILE"
+
+PROMPT_BODY="$(cat "$COMPOSITE_PROMPT_FILE")"
+
+# ── Start ffmpeg recording ──────────────────────────────────────────────
+T0=$(date +%s.%N)
+ffmpeg -y -f x11grab -video_size "$RES" -framerate "$FRAMERATE" \
+  -i ":${DISPLAY_NUM}" \
+  -c:v libx264 -preset ultrafast -pix_fmt yuv420p \
+  "$FULL_MP4" >/tmp/ffmpeg-record.log 2>&1 &
+FFMPEG_PID=$!
+sleep 1
+
+# ── Build claude command piped through the demo renderer ────────────────
+# stream-json gives us the tool-use timeline for scene detection;
+# demo_renderer.py pretty-prints it back to readable text in the xterm.
+# Bash is allowed for `git add`/`git commit` (per composite-demo.md);
+# Edit is allowed so claude can modify cherry-pick.ts live.
+CLAUDE_CMD=(
+  claude -p "$PROMPT_BODY"
+  --mcp-config "$MCP_CONFIG_MATERIALIZED"
+  --strict-mcp-config
+  --allowed-tools "mcp__bicameral,Read,Grep,Edit,Bash"
+  --add-dir "$DESKTOP_REPO_PATH"
+  --output-format stream-json
+  --verbose
+  --no-session-persistence
+  --max-budget-usd 5.0
+  --dangerously-skip-permissions
+)
+
+CLAUDE_LINE=""
+for arg in "${CLAUDE_CMD[@]}"; do
+  CLAUDE_LINE+=$(printf ' %q' "$arg")
+done
+
+# ── Launch xterm running claude → renderer ──────────────────────────────
+(
+  cd "$DESKTOP_REPO_PATH"  # so claude's Bash git commands run against the fixture repo
+  xterm -geometry 100x40+0+0 -fa Monospace -fs 11 \
+    -bg black -fg white -title "claude — composite demo (3 scenes)" \
+    -e bash -lc "${CLAUDE_LINE# } | python3 ${DEMO_RENDERER}; echo; echo '[demo] all scenes complete — recording wraps in 4s'; sleep 4" \
+    >/tmp/xterm-composite.log 2>&1 &
+  echo $! > /tmp/xterm-composite.pid
+)
+XTERM_PID=$(cat /tmp/xterm-composite.pid)
+
+# ── Poll for dashboard.port (up to 60s) and launch chromium ─────────────
+PORT=""
+for _ in $(seq 1 60); do
+  if [ -f "$PORT_FILE" ]; then
+    PORT="$(tr -d '[:space:]' < "$PORT_FILE" || true)"
+    [ -n "$PORT" ] && break
+  fi
+  sleep 1
+done
+
+CHROMIUM_PID=""
+if [ -n "$PORT" ]; then
+  "$CHROME_BIN" --no-sandbox --disable-gpu \
+    --window-size="${HALF_W},${RES_H}" \
+    --window-position="${HALF_W},0" \
+    --user-data-dir="/tmp/chromium-composite" \
+    --no-first-run --no-default-browser-check \
+    --new-window "http://localhost:${PORT}" \
+    >/tmp/chromium-composite.log 2>&1 &
+  CHROMIUM_PID=$!
+else
+  echo "  warning: dashboard port never appeared; recording xterm-only" >&2
+fi
+
+# ── Wait for claude to finish (cap 25 min) ──────────────────────────────
+COMPOSITE_TIMEOUT=1500
+WAITED=0
+while kill -0 "$XTERM_PID" 2>/dev/null; do
+  sleep 2
+  WAITED=$((WAITED + 2))
+  if [ "$WAITED" -ge "$COMPOSITE_TIMEOUT" ]; then
+    echo "  warning: composite demo exceeded ${COMPOSITE_TIMEOUT}s — killing xterm" >&2
+    kill "$XTERM_PID" 2>/dev/null || true
+    break
+  fi
+done
+
+# Brief pause so dashboard SSE settles into its final state on the right.
+sleep 4
+
+# ── Stop ffmpeg cleanly so the moov atom is flushed ─────────────────────
+kill -INT "$FFMPEG_PID" 2>/dev/null || true
+wait "$FFMPEG_PID" 2>/dev/null || true
+
+if [ -n "$CHROMIUM_PID" ]; then
+  kill "$CHROMIUM_PID" 2>/dev/null || true
+  wait "$CHROMIUM_PID" 2>/dev/null || true
+fi
+
+if [ ! -s "$FULL_MP4" ]; then
+  echo "ERROR: $FULL_MP4 missing or empty — nothing to split" >&2
+  exit 1
+fi
+
+echo "=== full.mp4 written ($(stat -c%s "$FULL_MP4" 2>/dev/null || stat -f%z "$FULL_MP4") bytes) ==="
+echo "=== Scene markers ==="
+cat "$SCENES_FILE" 2>/dev/null || echo "(no scenes file)"
+
+# ── Extract scene boundaries (epoch → seconds-from-T0) ──────────────────
+to_offset() {
+  python3 - "$T0" "$1" <<'PY'
+import sys
+t0 = float(sys.argv[1])
+t = float(sys.argv[2])
+print(f"{max(0.0, t - t0):.3f}")
+PY
+}
+
+SCENE_1_TO_2_EPOCH="$(grep '^scene_1_to_2=' "$SCENES_FILE" 2>/dev/null | tail -1 | cut -d= -f2 || true)"
+SCENE_2_TO_3_EPOCH="$(grep '^scene_2_to_3=' "$SCENES_FILE" 2>/dev/null | tail -1 | cut -d= -f2 || true)"
+
+# ── Fallback path: if scene markers are missing, keep full.mp4 as the
+# only artifact — pm/dev split is impossible without timestamps. ────────
+if [ -z "$SCENE_1_TO_2_EPOCH" ] || [ -z "$SCENE_2_TO_3_EPOCH" ]; then
+  echo "WARNING: scene boundary markers missing — emitting full.mp4 only" >&2
+  echo "  (pm.mp4 / dev.mp4 will not be generated)"
+  ls -la "$OUT_DIR"
+  exit 0
+fi
+
+T1="$(to_offset "$SCENE_1_TO_2_EPOCH")"
+T2="$(to_offset "$SCENE_2_TO_3_EPOCH")"
+echo "Scene boundaries (s from T0): t1=$T1  t2=$T2"
+
+# ── Trim full.mp4 into three pieces (re-encoded for frame-accurate cuts) ─
+PM_PRE="$RESULTS_DIR/pm-pre.mp4"
+DEV_OUT="$OUT_DIR/dev.mp4"
+PM_POST="$RESULTS_DIR/pm-post.mp4"
+
+# Common encoder flags so all pieces share codec/format for safe concat.
+ENC_FLAGS=(
+  -c:v libx264 -preset ultrafast -pix_fmt yuv420p
+  -r "$FRAMERATE"
+  -an
+)
+
+ffmpeg -y -i "$FULL_MP4" -ss 0 -to "$T1" "${ENC_FLAGS[@]}" "$PM_PRE" \
+  >>/tmp/ffmpeg-split.log 2>&1
+ffmpeg -y -i "$FULL_MP4" -ss "$T1" -to "$T2" "${ENC_FLAGS[@]}" "$DEV_OUT" \
+  >>/tmp/ffmpeg-split.log 2>&1
+ffmpeg -y -i "$FULL_MP4" -ss "$T2" "${ENC_FLAGS[@]}" "$PM_POST" \
+  >>/tmp/ffmpeg-split.log 2>&1
+
+# ── Generate transition slide between PM-pre and PM-post ────────────────
+TRANSITION="$RESULTS_DIR/transition.mp4"
+FONT_BOLD="/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf"
+FONT_REG="/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"
+
+# Three lines centered on a deep navy background. Font sizes chosen for
+# 1920x1080 readability; colors match a darkmode-dashboard palette so
+# the transition feels of-a-piece with the rest of the demo.
+ffmpeg -y \
+  -f lavfi -i "color=c=#0a0e27:s=${RES_W}x${RES_H}:d=${TRANSITION_DURATION}:r=${FRAMERATE}" \
+  -vf "drawtext=fontfile='${FONT_BOLD}':text='— Pre-implementation complete —':fontsize=58:fontcolor=#8aa0c8:x=(w-text_w)/2:y=(h-text_h)/2-180,
+       drawtext=fontfile='${FONT_BOLD}':text='Dev now implements the change':fontsize=78:fontcolor=#ffffff:x=(w-text_w)/2:y=(h-text_h)/2-60,
+       drawtext=fontfile='${FONT_REG}':text='(see dev.mp4 — preflight, edit, commit, link_commit, resolve_compliance)':fontsize=30:fontcolor=#8aa0c8:x=(w-text_w)/2:y=(h-text_h)/2+40,
+       drawtext=fontfile='${FONT_BOLD}':text='Returning to PM after the implementation has landed':fontsize=46:fontcolor=#ffd76a:x=(w-text_w)/2:y=(h-text_h)/2+160" \
+  "${ENC_FLAGS[@]}" -t "$TRANSITION_DURATION" "$TRANSITION" \
+  >>/tmp/ffmpeg-transition.log 2>&1
+
+# ── Concat pm.mp4 = pm-pre + transition + pm-post ───────────────────────
+PM_CONCAT_LIST="$RESULTS_DIR/pm-concat.txt"
+{
+  echo "file '$PM_PRE'"
+  echo "file '$TRANSITION'"
+  echo "file '$PM_POST'"
+} > "$PM_CONCAT_LIST"
+
+PM_OUT="$OUT_DIR/pm.mp4"
+ffmpeg -y -f concat -safe 0 -i "$PM_CONCAT_LIST" \
+  "${ENC_FLAGS[@]}" "$PM_OUT" >>/tmp/ffmpeg-concat.log 2>&1
+
+# Clean up the scratch trims; keep full.mp4 + dev.mp4 + pm.mp4 in OUT_DIR.
+rm -f "$PM_PRE" "$PM_POST" "$TRANSITION" "$PM_CONCAT_LIST"
+
+echo "=== Demo recording + split complete ==="
+ls -la "$OUT_DIR"
diff --git a/tests/e2e/record_demo_interactive.sh b/tests/e2e/record_demo_interactive.sh
new file mode 100755
index 00000000..b6f95109
--- /dev/null
+++ b/tests/e2e/record_demo_interactive.sh
@@ -0,0 +1,637 @@
+#!/usr/bin/env bash
+# Interactive demo recording — tmux-driven real claude TUI, per-scene sessions.
+#
+# Implementation of thoughts/shared/plans/2026-05-01-interactive-recording-spec.md.
+# Replaces the headless `claude -p` + demo_renderer.py path with five real
+# interactive Claude Code sessions (one per flow), driven by `tmux send-keys` /
+# bracketed paste. State carries across scenes via the shared surrealkv ledger
+# (matching run_e2e_flows.py's persistence contract).
+#
+# Layout (1920x1080):
+#   ┌──────────────────────────┬──────────────────────────┐
+#   │  xterm                   │  chromium                │
+#   │  attached to tmux pane   │  http://localhost:<port> │
+#   │  running interactive     │  bicameral dashboard     │
+#   │  claude TUI              │  (live SSE updates)      │
+#   └──────────────────────────┴──────────────────────────┘
+#
+# Output (in $OUT_DIR):
+#   - full-int.mp4   — raw continuous capture of all 5 scenes (no transition)
+#   - scene-1.mp4 … scene-5.mp4 — per-scene splits
+#   - pm.mp4         — scene-1 + transition slide + scene-5
+#   - dev.mp4        — scene-2 + scene-3 + scene-4
+#
+# Legacy `record_demo.sh` is intentionally retained as a fallback path; the
+# workflow's `recording` job has `continue-on-error: true`, so a flake here
+# leaves the assertion artifacts intact.
+#
+# Prereqs (Linux runner): Xvfb, fluxbox, xterm, ffmpeg, tmux, claude CLI,
+# bicameral-mcp, python3, chromium-compatible browser, DejaVu fonts.
+
+set -euo pipefail
+
+# ── Config ──────────────────────────────────────────────────────────────
+DISPLAY_NUM=99
+RES_W=1920
+RES_H=1080
+HALF_W=$((RES_W / 2))
+RES="${RES_W}x${RES_H}"
+FRAMERATE=10
+TRANSITION_DURATION=4
+
+# Per-scene polling caps (see spec §6.1, §6.3, §6.4).
+READY_TIMEOUT=90        # claude TUI must show input box within this — longer
+                        # because fresh-runner state walks 5+ onboarding dialogs
+IDLE_MAX_WAIT=300       # 5 min cap per scene for agent finish
+IDLE_STABLE_FOR=8       # input box must persist for N consecutive samples
+SESSION_DEAD_GRACE=60   # post-/exit grace for SessionEnd hook to run
+PORT_POLL_TIMEOUT=45    # post-paste wait for dashboard.port to appear
+
+E2E_DIR="$(cd "$(dirname "$0")" && pwd)"
+MCP_DIR="$(cd "$E2E_DIR/../.." && pwd)"
+OUT_DIR="$MCP_DIR/docs/demos/v0-userflow-e2e"
+RESULTS_DIR="$MCP_DIR/test-results/e2e"
+LEDGER_DIR="$RESULTS_DIR/ledger.db"
+MCP_CONFIG_TEMPLATE="$E2E_DIR/bicameral.mcp.json"
+MCP_CONFIG_MATERIALIZED="$RESULTS_DIR/bicameral.mcp.materialized.json"
+PROMPTS_DIR="$E2E_DIR/prompts"
+PORT_FILE="$HOME/.bicameral/dashboard.port"
+
+DESKTOP_REPO_PATH="${DESKTOP_REPO_PATH:-/tmp/desktop-clone}"
+
+mkdir -p "$OUT_DIR" "$RESULTS_DIR" "$(dirname "$PORT_FILE")"
+
+if [ ! -d "$DESKTOP_REPO_PATH" ]; then
+  echo "ERROR: DESKTOP_REPO_PATH=$DESKTOP_REPO_PATH does not exist." >&2
+  exit 2
+fi
+
+for bin in Xvfb fluxbox xterm ffmpeg claude bicameral-mcp python3 tmux; do
+  if ! command -v "$bin" >/dev/null 2>&1; then
+    echo "ERROR: required binary '$bin' not found on PATH." >&2
+    exit 2
+  fi
+done
+
+CHROME_BIN="$(command -v google-chrome-stable \
+  || command -v google-chrome \
+  || command -v chromium \
+  || command -v chromium-browser \
+  || true)"
+if [ -z "$CHROME_BIN" ]; then
+  echo "ERROR: no chromium-compatible browser found on PATH." >&2
+  exit 2
+fi
+echo "[demo] using browser: $CHROME_BIN"
+
+# ── Auth: ANTHROPIC_API_KEY (NOT CLAUDE_CODE_OAUTH_TOKEN) ──────────────
+# Verified locally and matches GH issue #32463: interactive `claude` reads
+# but does NOT honour `CLAUDE_CODE_OAUTH_TOKEN`. It DOES honour
+# `ANTHROPIC_API_KEY`, but on first run it shows a "Detected a custom API
+# key in your environment / Do you want to use this API key?" picker that
+# we have to dismiss in `wait_for_claude_ready`. The assertions job keeps
+# using OAuth (its `claude -p` path honours that env var fine).
+if [ -z "${ANTHROPIC_API_KEY:-}" ]; then
+  echo "[demo] WARNING: ANTHROPIC_API_KEY unset — interactive claude will hit the 'Select login method' picker with no way to advance" >&2
+fi
+
+# ── Setup substrate — single source of truth shared with run_e2e_flows.py.
+#    `_harness_setup.py` materializes the MCP config, writes claude-settings
+#    with all three hooks (PostToolUse / SessionEnd / UserPromptSubmit) wired
+#    via setup_wizard, bootstraps `.bicameral/` inside DESKTOP_REPO_PATH so
+#    the SessionEnd guard passes, wipes the ledger, and resets the desktop
+#    clone. The recording job and the assertion job both call this — no
+#    inline duplication, no drift between the two paths. ──────────────────
+SETTINGS_FILE="$RESULTS_DIR/claude-settings-with-hook.json"
+python3 "$E2E_DIR/_harness_setup.py" \
+  --desktop-repo-path "$DESKTOP_REPO_PATH" \
+  --results-dir "$RESULTS_DIR" \
+  --mcp-config-template "$MCP_CONFIG_TEMPLATE" \
+  --mcp-root "$MCP_DIR" \
+  >/dev/null
+rm -f "$PORT_FILE"
+
+# ── Start Xvfb + minimal WM ─────────────────────────────────────────────
+Xvfb ":${DISPLAY_NUM}" -screen 0 "${RES}x24" -nolisten tcp >/tmp/xvfb.log 2>&1 &
+XVFB_PID=$!
+export DISPLAY=":${DISPLAY_NUM}"
+sleep 1
+
+fluxbox >/tmp/fluxbox.log 2>&1 &
+FLUXBOX_PID=$!
+sleep 1
+
+CHROMIUM_PID=""
+CURRENT_PORT=""
+FFMPEG_PID=""
+XTERM_PIDS=()
+
+cleanup() {
+  set +e
+  if [ -n "$FFMPEG_PID" ]; then
+    kill -INT "$FFMPEG_PID" 2>/dev/null
+    wait "$FFMPEG_PID" 2>/dev/null
+  fi
+  if [ -n "$CHROMIUM_PID" ]; then
+    kill "$CHROMIUM_PID" 2>/dev/null
+    wait "$CHROMIUM_PID" 2>/dev/null
+  fi
+  for s in $(tmux list-sessions -F '#S' 2>/dev/null | grep '^scene-' || true); do
+    tmux kill-session -t "$s" 2>/dev/null
+  done
+  for p in "${XTERM_PIDS[@]}"; do
+    kill "$p" 2>/dev/null
+  done
+  kill "$FLUXBOX_PID" "$XVFB_PID" 2>/dev/null
+  wait 2>/dev/null
+}
+trap cleanup EXIT
+
+# ── Recording paths ─────────────────────────────────────────────────────
+FULL_MP4="$OUT_DIR/full-int.mp4"
+SCENE_BOUNDS_FILE="$RESULTS_DIR/scene-bounds-int.txt"
+: > "$SCENE_BOUNDS_FILE"
+
+# ── Helpers ──────────────────────────────────────────────────────────────
+
+# now_offset — seconds elapsed since ffmpeg started (T0)
+now_offset() {
+  python3 - "$T0" "$(date +%s.%N)" <<'PY'
+import sys
+print(f"{max(0.0, float(sys.argv[2]) - float(sys.argv[1])):.3f}")
+PY
+}
+
+# wait_for_claude_ready <session>
+# Walks the first-run onboarding dialog stack on a fresh CI runner.
+# Verified locally against claude 2.1.126 with HOME=tmpdir, ANTHROPIC_API_KEY
+# set: dismissals reach the `^❯ ` input prompt at t≈7s.
+#
+# Sequence (each fires at most once per session):
+#   1. Theme picker  ("Choose the text style ... run /theme")
+#        — Enter (default option 2 = Dark mode is preselected)
+#   2. API key picker ("Detected a custom API key in your environment")
+#        — '1' (override the preselected "No (recommended)" with "Yes")
+#   3. Security notes ("Security notes: ... Press Enter to continue…")
+#        — Enter
+#   4. Trust folder  ("Quick safety check ... trust this folder")
+#        — Enter (default option 1 = Yes is preselected)
+#   5. New MCP server prompt ("New MCP server found in .mcp.json")
+#        — Enter (default option 1 = Use this and all future)
+#   6. Bypass-permissions warning ("Claude Code running in Bypass Permissions mode")
+#        — '2' (override the preselected "No, exit" with "Yes, I accept")
+#
+# Detection: search WHOLE pane (not `tail -3`) — claude renders dialogs at a
+# fixed row near the middle of a tall pane. The `^❯` anchor at column 0
+# matches only the actual input prompt, not the menu rows ` ❯ 2. ...` which
+# have a leading space.
+wait_for_claude_ready() {
+  local session=$1
+  local i=0
+  declare -A dismissed=()
+  while [ $i -lt $READY_TIMEOUT ]; do
+    if ! tmux has-session -t "$session" 2>/dev/null; then
+      echo "  warning: $session died before TUI was ready" >&2
+      return 1
+    fi
+    local pane
+    pane="$(tmux capture-pane -t "$session" -p 2>/dev/null || true)"
+
+    # Ready
+    if printf '%s' "$pane" | grep -q '^❯'; then
+      return 0
+    fi
+
+    # Onboarding dialogs — each at most once per session
+    if [ -z "${dismissed[theme]:-}" ] && \
+       printf '%s' "$pane" | grep -qE 'Choose the text style|run /theme'; then
+      tmux send-keys -t "$session" Enter
+      dismissed[theme]=1; sleep 2; i=$((i+2)); continue
+    fi
+    if [ -z "${dismissed[api_key]:-}" ] && \
+       printf '%s' "$pane" | grep -q 'Detected a custom API key'; then
+      tmux send-keys -t "$session" '1'
+      dismissed[api_key]=1; sleep 2; i=$((i+2)); continue
+    fi
+    if [ -z "${dismissed[security]:-}" ] && \
+       printf '%s' "$pane" | grep -q 'Security notes:'; then
+      tmux send-keys -t "$session" Enter
+      dismissed[security]=1; sleep 2; i=$((i+2)); continue
+    fi
+    if [ -z "${dismissed[trust]:-}" ] && \
+       printf '%s' "$pane" | grep -q 'trust this folder'; then
+      tmux send-keys -t "$session" Enter
+      dismissed[trust]=1; sleep 2; i=$((i+2)); continue
+    fi
+    if [ -z "${dismissed[mcp]:-}" ] && \
+       printf '%s' "$pane" | grep -q 'New MCP server found'; then
+      tmux send-keys -t "$session" Enter
+      dismissed[mcp]=1; sleep 2; i=$((i+2)); continue
+    fi
+    if [ -z "${dismissed[bypass]:-}" ] && \
+       printf '%s' "$pane" | grep -q 'Bypass Permissions mode'; then
+      tmux send-keys -t "$session" '2'
+      dismissed[bypass]=1; sleep 2; i=$((i+2)); continue
+    fi
+
+    sleep 1
+    i=$((i+1))
+  done
+  echo "  warning: claude TUI never showed input prompt for $session" >&2
+  return 1
+}
+
+# type_prompt <session> <body> [total_seconds]
+# Types body character-by-character so the recording shows a human-paced
+# typing animation (default ~3s total regardless of length, like the user
+# asked). Embedded newlines are inserted via M-Enter (Alt+Return) — the
+# only escape that preserves newlines in claude TUI's input box without
+# submitting (verified locally). Final Enter submits.
+type_prompt() {
+  local session=$1
+  local body=$2
+  local total_secs=${3:-3}
+  local len=${#body}
+  if [ "$len" -le 0 ]; then return; fi
+  local delay
+  delay=$(python3 -c "print(round(max(0.005, ${total_secs} / ${len}), 4))")
+  local i ch
+  for ((i=0; i<len; i++)); do
+    ch="${body:$i:1}"
+    if [ "$ch" = $'\n' ]; then
+      tmux send-keys -t "$session" M-Enter
+    else
+      tmux send-keys -t "$session" -l "$ch"
+    fi
+    sleep "$delay"
+  done
+  sleep 0.3
+  tmux send-keys -t "$session" Enter
+}
+
+# wait_for_agent_idle <session>
+# Claude TUI keeps the `❯ ` input prompt rendered at a fixed row even while
+# streaming, so the prompt-visible test is necessary but not sufficient. The
+# real signal that the agent stopped is pane stability — when the streaming
+# output stops mutating for IDLE_STABLE_FOR consecutive samples, we're idle.
+wait_for_agent_idle() {
+  local session=$1
+  local stable_count=0
+  local i=0
+  local prev=""
+  while [ $i -lt $IDLE_MAX_WAIT ]; do
+    if ! tmux has-session -t "$session" 2>/dev/null; then
+      echo "  warning: $session died mid-response" >&2
+      return 1
+    fi
+    local pane
+    pane="$(tmux capture-pane -t "$session" -p 2>/dev/null || true)"
+    if [ "$pane" = "$prev" ] && printf '%s' "$pane" | grep -q '^❯'; then
+      stable_count=$((stable_count+1))
+      if [ $stable_count -ge $IDLE_STABLE_FOR ]; then
+        return 0
+      fi
+    else
+      stable_count=0
+    fi
+    prev=$pane
+    sleep 1
+    i=$((i+1))
+  done
+  echo "  warning: agent_idle timed out after ${IDLE_MAX_WAIT}s for $session" >&2
+  return 1
+}
+
+# wait_for_session_dead <session>
+# After /exit, claude runs the SessionEnd hook (capture-corrections may fire)
+# before the process actually exits. Wait for natural death; force-kill only
+# after the grace period to avoid polluting the ledger mid-hook.
+wait_for_session_dead() {
+  local session=$1
+  local i=0
+  while tmux has-session -t "$session" 2>/dev/null; do
+    sleep 1
+    i=$((i+1))
+    if [ $i -ge $SESSION_DEAD_GRACE ]; then
+      echo "  warning: $session didn't exit after ${SESSION_DEAD_GRACE}s — force-killing" >&2
+      tmux kill-session -t "$session" 2>/dev/null
+      break
+    fi
+  done
+}
+
+# poll_port_file — wait up to PORT_POLL_TIMEOUT for the dashboard sidecar to
+# write its bound port. Returns the port on stdout (empty on timeout).
+poll_port_file() {
+  local i=0
+  while [ $i -lt $PORT_POLL_TIMEOUT ]; do
+    if [ -f "$PORT_FILE" ]; then
+      local p
+      p="$(tr -d '[:space:]' < "$PORT_FILE" || true)"
+      if [ -n "$p" ]; then
+        printf '%s' "$p"
+        return 0
+      fi
+    fi
+    sleep 1
+    i=$((i+1))
+  done
+  return 1
+}
+
+# refresh_chromium_for_port <port>
+# Each scene = new MCP process = new port. Kill the previous chromium and
+# relaunch on the new port (spec §6.5 option A). The brief flicker visually
+# emphasises the scene boundary; option B (standalone dashboard sidecar) is
+# a deferred follow-up.
+refresh_chromium_for_port() {
+  local new_port=$1
+  if [ "$new_port" = "$CURRENT_PORT" ] && [ -n "$CHROMIUM_PID" ] && kill -0 "$CHROMIUM_PID" 2>/dev/null; then
+    return 0
+  fi
+  if [ -n "$CHROMIUM_PID" ]; then
+    kill "$CHROMIUM_PID" 2>/dev/null || true
+    wait "$CHROMIUM_PID" 2>/dev/null || true
+  fi
+  "$CHROME_BIN" --no-sandbox --disable-gpu \
+    --window-size="${HALF_W},${RES_H}" \
+    --window-position="${HALF_W},0" \
+    --user-data-dir="/tmp/chromium-int-${new_port}" \
+    --no-first-run --no-default-browser-check \
+    --new-window "http://localhost:${new_port}" \
+    >>/tmp/chromium-int.log 2>&1 &
+  CHROMIUM_PID=$!
+  CURRENT_PORT=$new_port
+}
+
+# ── Start ffmpeg (continuous capture) ────────────────────────────────────
+T0=$(date +%s.%N)
+ffmpeg -y -f x11grab -video_size "$RES" -framerate "$FRAMERATE" \
+  -i ":${DISPLAY_NUM}" \
+  -c:v libx264 -preset ultrafast -pix_fmt yuv420p \
+  "$FULL_MP4" >/tmp/ffmpeg-int.log 2>&1 &
+FFMPEG_PID=$!
+sleep 1
+
+# ── Per-scene loop ──────────────────────────────────────────────────────
+# One tmux+claude session per flow, mirroring run_e2e_flows.py exactly. State
+# persists via the shared surrealkv ledger; what differs from headless is the
+# real TUI rendering and the human-paced typed input.
+SCENES=(
+  "1:flow-1-ingest.md"
+  "2:flow-2-preflight.md"
+  "3:flow-3-commit-sync.md"
+  "4:flow-4-session-end.md"
+  "5:flow-5-history.md"
+)
+
+# Dashboard preamble — kept out of the flow prompt files so the assertion
+# harness (which doesn't record) can reuse them as-is. Each scene's MCP
+# process has its own port; this preamble triggers the dashboard tool so
+# the port file is written and we can point chromium at it.
+DASHBOARD_PREAMBLE='Before doing anything else, call bicameral.dashboard so a live dashboard sidecar is bound to this MCP process. Then continue with the request below.
+
+'
+
+run_scene() {
+  local N=$1
+  local FILE=$2
+  local SESSION="scene-${N}"
+  local PROMPT_FILE="$PROMPTS_DIR/$FILE"
+  local CLAUDE_LOG="$RESULTS_DIR/claude-scene-${N}.stderr"
+  local CLAUDE_EXIT="$RESULTS_DIR/claude-scene-${N}.exit"
+  local PANE_DUMP="$RESULTS_DIR/scene-${N}-pane.txt"
+  local RUNNER="$RESULTS_DIR/claude-scene-${N}.sh"
+  echo "=== Scene ${N} (${FILE}) ==="
+
+  # New MCP process per scene → port may change. Wipe stale port file so the
+  # poll below only sees this scene's value.
+  rm -f "$PORT_FILE" "$CLAUDE_LOG" "$CLAUDE_EXIT"
+
+  echo "scene_${N}_start=$(now_offset)" >> "$SCENE_BOUNDS_FILE"
+
+  # Per-scene runner: redirects claude's stderr to a log and writes its exit
+  # code to a sibling file, so a startup failure (bad flag, missing OAuth,
+  # MCP crash) leaves actionable diagnostics instead of a silent dead pane.
+  # `--no-session-persistence` and `--max-budget-usd` are intentionally NOT
+  # passed — both are documented as `--print`-only and cause an immediate
+  # exit-1 in interactive mode (verified locally against claude 2.1.x).
+  cat > "$RUNNER" <<EOF
+#!/usr/bin/env bash
+cd "$DESKTOP_REPO_PATH"
+exec 2>"$CLAUDE_LOG"
+claude \\
+    --mcp-config "$MCP_CONFIG_MATERIALIZED" \\
+    --strict-mcp-config \\
+    --settings "$SETTINGS_FILE" \\
+    --allowed-tools mcp__bicameral,Read,Grep,Edit,Bash \\
+    --add-dir "$DESKTOP_REPO_PATH" \\
+    --dangerously-skip-permissions
+echo "exit=\$?" > "$CLAUDE_EXIT"
+EOF
+  chmod +x "$RUNNER"
+
+  tmux new-session -d -s "$SESSION" -x 110 -y 40 "$RUNNER" || {
+    echo "  ERROR: tmux new-session failed for $SESSION" >&2
+    echo "scene_${N}_end=$(now_offset)" >> "$SCENE_BOUNDS_FILE"
+    return 1
+  }
+
+  xterm -geometry 100x40+0+0 -fa Monospace -fs 11 \
+    -bg black -fg white -title "claude — scene ${N}: ${FILE}" \
+    -e bash -lc "tmux attach -t $SESSION; sleep 2" \
+    >/tmp/xterm-scene-${N}.log 2>&1 &
+  XTERM_PIDS+=($!)
+
+  if ! wait_for_claude_ready "$SESSION"; then
+    {
+      echo "--- last pane capture ---"
+      tmux capture-pane -t "$SESSION" -p 2>/dev/null || echo "(no pane — session dead)"
+      echo "--- claude stderr ---"
+      cat "$CLAUDE_LOG" 2>/dev/null || echo "(no stderr log)"
+      echo "--- claude exit ---"
+      cat "$CLAUDE_EXIT" 2>/dev/null || echo "(no exit file — process may still be alive)"
+    } > "$PANE_DUMP"
+    echo "  ERROR: scene ${N} did not reach ready state — diagnostics in $PANE_DUMP" >&2
+    tmux kill-session -t "$SESSION" 2>/dev/null || true
+    echo "scene_${N}_end=$(now_offset)" >> "$SCENE_BOUNDS_FILE"
+    return 1
+  fi
+
+  PROMPT_BODY="${DASHBOARD_PREAMBLE}$(cat "$PROMPT_FILE")"
+  type_prompt "$SESSION" "$PROMPT_BODY" 3
+
+  if PORT="$(poll_port_file)"; then
+    refresh_chromium_for_port "$PORT"
+  else
+    echo "  warning: scene ${N} dashboard.port never appeared — right pane may be stale" >&2
+  fi
+
+  wait_for_agent_idle "$SESSION" || true
+
+  # Pause so the dashboard SSE settles into its final state for this scene
+  # (also masks the chromium reload flicker on the next scene behind a still
+  # frame of the closing state).
+  sleep 3
+
+  # Trigger SessionEnd hook (capture-corrections may auto-fire here), then
+  # wait for the tmux session to die naturally.
+  tmux send-keys -t "$SESSION" '/exit' Enter
+  wait_for_session_dead "$SESSION"
+
+  tmux capture-pane -t "$SESSION" -p -S - 2>/dev/null > "$PANE_DUMP" || true
+
+  echo "scene_${N}_end=$(now_offset)" >> "$SCENE_BOUNDS_FILE"
+  return 0
+}
+
+# `set +e` around each scene so a single failure doesn't abort the whole run —
+# we still want the partial recording + diagnostics for the scenes that did
+# work. Failed scenes still emit start/end bounds (zero-length window) so the
+# downstream split logic walks them as empty cuts.
+for entry in "${SCENES[@]}"; do
+  N="${entry%%:*}"
+  FILE="${entry#*:}"
+  set +e
+  run_scene "$N" "$FILE"
+  rc=$?
+  set -e
+  if [ $rc -ne 0 ]; then
+    echo "  (scene ${N} failed; continuing to next)" >&2
+  fi
+done
+
+# Tail pause so ffmpeg captures a clean closing frame after scene 5.
+sleep 3
+
+# ── Stop ffmpeg cleanly ──────────────────────────────────────────────────
+kill -INT "$FFMPEG_PID" 2>/dev/null || true
+wait "$FFMPEG_PID" 2>/dev/null || true
+FFMPEG_PID=""
+
+if [ -n "$CHROMIUM_PID" ]; then
+  kill "$CHROMIUM_PID" 2>/dev/null || true
+  wait "$CHROMIUM_PID" 2>/dev/null || true
+  CHROMIUM_PID=""
+fi
+
+if [ ! -s "$FULL_MP4" ]; then
+  echo "ERROR: $FULL_MP4 missing or empty — nothing to split" >&2
+  exit 1
+fi
+
+echo "=== full-int.mp4 written ($(stat -c%s "$FULL_MP4" 2>/dev/null || stat -f%z "$FULL_MP4") bytes) ==="
+echo "=== Scene boundaries (offsets from T0) ==="
+cat "$SCENE_BOUNDS_FILE"
+
+# ── Read boundary timestamps ─────────────────────────────────────────────
+get_bound() { grep "^${1}=" "$SCENE_BOUNDS_FILE" | tail -1 | cut -d= -f2; }
+
+T_S1="$(get_bound scene_1_start)"
+T_E1="$(get_bound scene_1_end)"
+T_S2="$(get_bound scene_2_start)"
+T_E2="$(get_bound scene_2_end)"
+T_S3="$(get_bound scene_3_start)"
+T_E3="$(get_bound scene_3_end)"
+T_S4="$(get_bound scene_4_start)"
+T_E4="$(get_bound scene_4_end)"
+T_S5="$(get_bound scene_5_start)"
+T_E5="$(get_bound scene_5_end)"
+
+# Fallback path: if any boundary is missing, keep full-int.mp4 only — the
+# split is meaningless without a complete set of timestamps.
+for v in "$T_S1" "$T_E1" "$T_S2" "$T_E2" "$T_S3" "$T_E3" "$T_S4" "$T_E4" "$T_S5" "$T_E5"; do
+  if [ -z "$v" ]; then
+    echo "WARNING: scene boundary missing — emitting full-int.mp4 only" >&2
+    ls -la "$OUT_DIR"
+    exit 0
+  fi
+done
+
+# ── Trim into per-scene mp4s (re-encoded for safe concat) ───────────────
+ENC_FLAGS=(
+  -c:v libx264 -preset ultrafast -pix_fmt yuv420p
+  -r "$FRAMERATE"
+  -an
+)
+
+# Failed scenes produce a zero-length (or near-zero) window. Skip them so we
+# don't emit empty mp4s that break the downstream concat.
+cut_scene() {
+  local from=$1 to=$2 dst=$3
+  local span
+  span="$(python3 -c "print(max(0.0, float('$to') - float('$from')))")"
+  if python3 -c "import sys; sys.exit(0 if float('$span') >= 0.5 else 1)"; then
+    ffmpeg -y -i "$FULL_MP4" -ss "$from" -to "$to" "${ENC_FLAGS[@]}" "$dst" \
+      >>/tmp/ffmpeg-int-split.log 2>&1 || rm -f "$dst"
+  else
+    echo "  skip: $(basename "$dst") window=${span}s (scene likely failed)" >&2
+    rm -f "$dst"
+  fi
+}
+
+S1="$OUT_DIR/scene-1.mp4"
+S2="$OUT_DIR/scene-2.mp4"
+S3="$OUT_DIR/scene-3.mp4"
+S4="$OUT_DIR/scene-4.mp4"
+S5="$OUT_DIR/scene-5.mp4"
+
+cut_scene "$T_S1" "$T_E1" "$S1"
+cut_scene "$T_S2" "$T_E2" "$S2"
+cut_scene "$T_S3" "$T_E3" "$S3"
+cut_scene "$T_S4" "$T_E4" "$S4"
+cut_scene "$T_S5" "$T_E5" "$S5"
+
+# ── Generate transition slide (matches legacy aesthetic) ─────────────────
+TRANSITION="$RESULTS_DIR/transition-int.mp4"
+FONT_BOLD="/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf"
+FONT_REG="/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"
+
+ffmpeg -y \
+  -f lavfi -i "color=c=#0a0e27:s=${RES_W}x${RES_H}:d=${TRANSITION_DURATION}:r=${FRAMERATE}" \
+  -vf "drawtext=fontfile='${FONT_BOLD}':text='— Pre-implementation complete —':fontsize=58:fontcolor=#8aa0c8:x=(w-text_w)/2:y=(h-text_h)/2-180,
+       drawtext=fontfile='${FONT_BOLD}':text='Dev now implements the change':fontsize=78:fontcolor=#ffffff:x=(w-text_w)/2:y=(h-text_h)/2-60,
+       drawtext=fontfile='${FONT_REG}':text='(see dev.mp4 — preflight, commit-sync, session-end capture)':fontsize=30:fontcolor=#8aa0c8:x=(w-text_w)/2:y=(h-text_h)/2+40,
+       drawtext=fontfile='${FONT_BOLD}':text='Returning to PM after the implementation has landed':fontsize=46:fontcolor=#ffd76a:x=(w-text_w)/2:y=(h-text_h)/2+160" \
+  "${ENC_FLAGS[@]}" -t "$TRANSITION_DURATION" "$TRANSITION" \
+  >>/tmp/ffmpeg-int-transition.log 2>&1
+
+# pm/dev concat — only include scene mp4s that actually exist (a failed scene
+# leaves no file behind; see cut_scene). Skip the concat entirely if every
+# input is missing.
+write_concat_list() {
+  local list=$1
+  shift
+  : > "$list"
+  for f in "$@"; do
+    if [ -s "$f" ]; then
+      echo "file '$f'" >> "$list"
+    fi
+  done
+}
+
+run_concat() {
+  local list=$1 out=$2
+  if [ ! -s "$list" ]; then
+    echo "  warning: $(basename "$out") concat list empty — skipping" >&2
+    return 0
+  fi
+  ffmpeg -y -f concat -safe 0 -i "$list" "${ENC_FLAGS[@]}" "$out" \
+    >>/tmp/ffmpeg-int-concat.log 2>&1
+}
+
+PM_OUT="$OUT_DIR/pm.mp4"
+PM_LIST="$RESULTS_DIR/pm-int-concat.txt"
+write_concat_list "$PM_LIST" "$S1" "$TRANSITION" "$S5"
+run_concat "$PM_LIST" "$PM_OUT"
+
+DEV_OUT="$OUT_DIR/dev.mp4"
+DEV_LIST="$RESULTS_DIR/dev-int-concat.txt"
+write_concat_list "$DEV_LIST" "$S2" "$S3" "$S4"
+run_concat "$DEV_LIST" "$DEV_OUT"
+
+# Clean up scratch files; keep per-scene mp4s + pm.mp4 + dev.mp4 + full-int.mp4.
+rm -f "$PM_LIST" "$DEV_LIST" "$TRANSITION"
+
+echo "=== Interactive recording + split complete ==="
+ls -la "$OUT_DIR"
diff --git a/tests/e2e/run_e2e_flows.py b/tests/e2e/run_e2e_flows.py
new file mode 100644
index 00000000..c7858abd
--- /dev/null
+++ b/tests/e2e/run_e2e_flows.py
@@ -0,0 +1,1528 @@
+"""
+v0 user flow e2e — Claude Code CLI session orchestrator.
+
+Drives a real Claude Code CLI session per flow (5 sessions total), with
+bicameral-mcp registered as the only MCP server, and asserts on the
+stream-json transcript that the right MCP tools were called with the
+right shapes.
+
+Each flow:
+  1. Reads ``prompts/flow-N-*.md`` (natural-language user prompt)
+  2. Invokes ``claude -p <prompt> --mcp-config bicameral.mcp.json
+       --strict-mcp-config --output-format stream-json --add-dir <desktop_clone>``
+  3. Streams stdout to ``test-results/e2e/flow-N.ndjson``
+  4. Walks the transcript for tool_use blocks under ``mcp__bicameral__*``
+  5. Asserts per-flow invariants and prints PASS/FAIL
+
+The point: this exercises the full skill + MCP layer the way a user
+experiences it. The handler-replay sim at ``scripts/sim_issue_108_flows.py``
+remains useful for fast dev iteration on handler logic.
+
+Required env:
+  CLAUDE_CODE_OAUTH_TOKEN  Claude Code CLI auth (set by GitHub Actions
+                           ``production`` environment in CI).
+  DESKTOP_REPO_PATH        Path to a local clone of github.com/desktop/desktop.
+
+CI: see .github/workflows/v0-user-flow-e2e.yml.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import pathlib
+import shutil
+import subprocess
+import sys
+from collections.abc import Callable
+from dataclasses import dataclass, field
+
+E2E_ROOT = pathlib.Path(__file__).resolve().parent
+PROMPTS_DIR = E2E_ROOT / "prompts"
+MCP_CONFIG_TEMPLATE = E2E_ROOT / "bicameral.mcp.json"
+RESULTS_DIR = pathlib.Path(__file__).resolve().parents[2] / "test-results" / "e2e"
+RESULTS_DIR.mkdir(parents=True, exist_ok=True)
+
+# Wall-clock cap for a single `claude -p` flow invocation. Was 300s; CI
+# repeatedly tripped that limit on Flow 2 (the longest flow — chained
+# preflight → ingest(agent_session) → resolve_collision sequence after the
+# #154 hook landed). Last clean dev-branch Flow 2 measured 289.7s — only
+# ~3% headroom on the old cap. Bumped to 600s to give the post-hook
+# sequence plenty of margin without inflating the recording job's wall
+# beyond what GitHub Actions tolerates.
+CLAUDE_SESSION_TIMEOUT_S = 600
+
+# Persistent ledger shared across the 5 flow sessions in a single run, wiped
+# at the start of each run so flow-1 seeds → flow-2 refines → flow-3 reflects
+# → flow-4 captures → flow-5 ratifies, all against the same ledger state.
+LEDGER_DIR = RESULTS_DIR / "ledger.db"
+
+DESKTOP_REPO_PATH = os.environ.get("DESKTOP_REPO_PATH", "").strip()
+if not DESKTOP_REPO_PATH:
+    sys.stderr.write(
+        "ERROR: DESKTOP_REPO_PATH env var not set.\n"
+        "CI sets this automatically; locally:\n"
+        "  git clone --depth=1 https://github.com/desktop/desktop /tmp/desktop-clone\n"
+        "  DESKTOP_REPO_PATH=/tmp/desktop-clone python tests/e2e/run_e2e_flows.py\n"
+    )
+    sys.exit(2)
+
+if not shutil.which("claude"):
+    sys.stderr.write(
+        "ERROR: 'claude' CLI not found on PATH.\n"
+        "Install via: npm install -g @anthropic-ai/claude-code\n"
+    )
+    sys.exit(2)
+
+if not shutil.which("bicameral-mcp"):
+    sys.stderr.write(
+        "ERROR: 'bicameral-mcp' command not found on PATH.\nInstall via: pip install -e .\n"
+    )
+    sys.exit(2)
+
+
+# Setup helpers live in _harness_setup.py — single source of truth shared with
+# tests/e2e/record_demo_interactive.sh so the recording job and the assertion
+# job materialize byte-identical hook substrate. See _harness_setup.py docstring.
+sys.path.insert(0, str(E2E_ROOT))
+# fmt: off
+# isort: off
+from _harness_setup import (  # noqa: E402,I001  # path tweak above
+    bootstrap_bicameral_dir as _bootstrap_helper,
+    clean_ledger as _clean_ledger_helper,
+    materialize_mcp_config,
+    materialize_settings_with_hooks,
+    reset_desktop_repo as _reset_desktop_helper,
+)
+# fmt: on
+# isort: on
+
+_MCP_ROOT = pathlib.Path(__file__).resolve().parents[2]
+
+
+def _clean_ledger() -> None:
+    _clean_ledger_helper(LEDGER_DIR)
+
+
+def _reset_desktop_repo() -> None:
+    _reset_desktop_helper(DESKTOP_REPO_PATH)
+
+
+def _bootstrap_bicameral_dir() -> None:
+    _bootstrap_helper(DESKTOP_REPO_PATH, _MCP_ROOT)
+
+
+MCP_CONFIG_PATH = materialize_mcp_config(
+    template=MCP_CONFIG_TEMPLATE,
+    out_dir=RESULTS_DIR,
+    desktop_repo_path=DESKTOP_REPO_PATH,
+    ledger_dir=LEDGER_DIR,
+)
+SETTINGS_PATH = materialize_settings_with_hooks(
+    out_dir=RESULTS_DIR,
+    mcp_config_path=MCP_CONFIG_PATH,
+    mcp_root=_MCP_ROOT,
+)
+
+
+@dataclass
+class FlowSpec:
+    """Each flow declares its layer so failures can be triaged honestly.
+
+    - ``mcp_layer`` flows use prompts that explicitly invoke MCP tools (ingest,
+      link_commit, ratify, etc.). They validate that the tool surface works.
+      Failure here = real broken tool.
+    - ``agentic_layer`` flows use natural-developer-voice prompts and rely on
+      bicameral skills to AUTO-FIRE on intent (e.g. preflight on "refactor X",
+      capture-corrections at session end). Failure here is an advisory regression
+      signal: skills aren't reliably triggering in headless ``claude -p`` mode.
+      The interactive recording path (tmux-driven real TUI) is the primary
+      validator for this layer; this harness tracks the gap.
+    """
+
+    flow_id: str
+    prompt_file: str
+    asserter: Callable[[list[dict]], tuple[bool, str]]
+    category: str  # "mcp_layer" | "agentic_layer"
+    advisory: str = ""  # rendered when the flow FAILs to explain what it means
+    skip: bool = False  # if True, do not invoke claude — mark SKIP and render advisory
+    # Flows sharing a session_group run inside one continuous claude session
+    # (chained via --session-id + --resume) so that multi-turn skills like
+    # bicameral-capture-corrections have real transcript history to scan and
+    # the SessionEnd hook fires once per group at the final flow's exit.
+    # None = standalone session (default; also disables session persistence).
+    session_group: str | None = None
+    # If set, do NOT invoke claude — reuse the tool_calls captured by the
+    # named earlier flow and run this asserter against them. Lets two flows
+    # grade independent properties of the same claude session (e.g. Flow 2
+    # = auto-fire scope, Flow 2a = full correction-capture loop) without
+    # paying for a duplicate API call.
+    reuses_flow: str | None = None
+
+
+@dataclass
+class FlowResult:
+    flow_id: str
+    prompt_file: str
+    verdict: str  # "PASS" | "FAIL" | "ERROR" | "SKIP"
+    body: str
+    category: str = "mcp_layer"
+    advisory: str = ""
+    tool_calls: list[dict] = field(default_factory=list)
+    transcript_path: str = ""
+
+
+RESULTS: list[FlowResult] = []
+
+
+def section(result: FlowResult) -> None:
+    RESULTS.append(result)
+    line = result.body.splitlines()[0] if result.body else ""
+    print(f"[{result.flow_id}] {result.verdict} — {line[:100]}")
+
+
+# ── Post-hoc ledger validation ─────────────────────────────────────────
+
+
+def _snapshot_ledger() -> dict:
+    """Snapshot ledger state for before/after comparison. Returns counts of
+    decisions by status and total compliance_check rows. Uses raw client to
+    bypass the schema-migration crash documented in iteration 1.
+
+    Returns ``{"total_decisions": N, "by_status": {status: N}, "compliance_checks": N}``.
+    On any error, returns ``{"error": str}`` — caller decides how to handle.
+    """
+    import asyncio
+    import os
+
+    os.environ["SURREAL_URL"] = f"surrealkv://{LEDGER_DIR}"
+    try:
+        from ledger.client import LedgerClient  # noqa: E402
+
+        async def _q() -> dict:
+            client = LedgerClient(url=f"surrealkv://{LEDGER_DIR}")
+            await client.connect()
+            try:
+                drows = (
+                    await client.query(
+                        "SELECT decision_id, description, status FROM decision LIMIT 200"
+                    )
+                ) or []
+                ccrows = (
+                    await client.query(
+                        "SELECT decision_id, region_id, content_hash, verdict "
+                        "FROM compliance_check LIMIT 500"
+                    )
+                ) or []
+                buckets: dict[str, int] = {}
+                for r in drows:
+                    buckets[(r.get("status") or "unknown")] = (
+                        buckets.get(r.get("status") or "unknown", 0) + 1
+                    )
+                return {
+                    "total_decisions": len(drows),
+                    "by_status": buckets,
+                    "compliance_checks": len(ccrows),
+                    "compliance_rows": ccrows,
+                    "decisions": drows,
+                }
+            finally:
+                await client.close()
+
+        return asyncio.run(_q())
+    except Exception as exc:
+        return {"error": repr(exc)}
+
+
+def _count_agent_session_decisions(snapshot: dict) -> int | None:
+    """Wrapper around the pure helper in ``_ledger_helpers``. The helper
+    lives in its own module so unit tests can import it without triggering
+    the harness's top-level env-var / CLI-presence guards.
+    """
+    from _ledger_helpers import count_agent_session_decisions
+
+    return count_agent_session_decisions(snapshot)
+
+
+def _validate_flow4_via_ledger() -> None:
+    """Path-X-(b) validation per #147: open the ledger after the harness
+    completes and check for decisions written with source_type='agent_session'.
+
+    The SessionEnd hook spawns a separate ``claude -p`` subprocess whose
+    tool calls are NOT visible in the parent stream-json; the subprocess
+    writes to the ledger with source_type='agent_session', so its effect
+    IS observable post-hoc. This function merges that signal into Flow 4's
+    FlowResult, in-place.
+
+    Behavior matrix:
+    - Asserter PASS + ledger has agent_session: append confirmation note;
+      verdict unchanged.
+    - Asserter FAIL + ledger has agent_session: UPGRADE to PASS with note
+      'in-stream signal absent but SessionEnd subprocess effect observed
+      in ledger (path-X-b)'.
+    - Asserter result + ledger error: append INCONCLUSIVE note; verdict
+      unchanged.
+    - Asserter PASS + ledger has zero agent_session: verdict unchanged.
+    - Asserter FAIL + ledger has zero agent_session: verdict unchanged
+      (real failure; both observable signals absent).
+    """
+    flow4 = next((r for r in RESULTS if r.flow_id == "Flow 4"), None)
+    if flow4 is None:
+        return
+
+    print("\n=== Flow 4 — querying ledger state for path-X-(b) signal ===")
+    after = _snapshot_ledger()
+    count = _count_agent_session_decisions(after)
+
+    if count is None:
+        flow4.body += (
+            f"\n— Ledger validation —\nINCONCLUSIVE: ledger query failed: {after.get('error')}\n"
+        )
+        return
+
+    if count > 0:
+        if flow4.verdict != "PASS":
+            flow4.verdict = "PASS"
+        flow4.body += (
+            f"\n— Ledger validation —\n"
+            f"PASS: {count} decision(s) with source_type='agent_session' "
+            f"present in ledger after harness completion (path-X-b: SessionEnd "
+            f"subprocess and/or in-session capture-corrections wrote them).\n"
+        )
+    else:
+        flow4.body += (
+            "\n— Ledger validation —\n"
+            "path-X-b absent: zero decisions with source_type='agent_session' "
+            "after harness completion. SessionEnd subprocess either did not "
+            "fire, did not detect uningested corrections, or failed silently.\n"
+        )
+
+
+def _validate_flow3_via_ledger(session_id: str, baseline: dict) -> None:
+    """Validate the V1 lifecycle outcome by opening the ledger directly
+    after the chained dev_session has fully completed.
+
+    Per bicameral-mcp #135, the post-commit hook is sync-only — ``link_commit``
+    runs server-side via ``ensure_ledger_synced`` on the NEXT bicameral tool
+    call after HEAD moves (naturally happens during Flow 4's preflight, since
+    it's chained in the same session). Without a caller-LLM, ``resolve_compliance``
+    can't fire from the hook, so the V1 success outcome we can validate
+    headless is: at least one decision flipped to ``status='pending'``
+    after Flow 3's commit.
+
+    This is Flow 3's REAL assertion — the per-flow stream-json check (did
+    git commit happen?) is a precondition. The ledger state IS the verdict.
+    This function finds the existing Flow 3 ``FlowResult`` and merges the
+    ledger findings into its body + verdict. No separate row is added.
+    """
+    flow3 = next((r for r in RESULTS if r.flow_id == "Flow 3"), None)
+    if flow3 is None:
+        sys.stderr.write("Ledger validation: no Flow 3 result to merge into.\n")
+        return
+
+    print("\n=== Flow 3 — querying ledger state for V1 lifecycle outcome ===")
+
+    after = _snapshot_ledger()
+    if "error" in after:
+        flow3.verdict = "ERROR"
+        flow3.body += (
+            f"\n— Ledger validation —\nfailed to open ledger at {LEDGER_DIR}: {after['error']}\n"
+        )
+        return
+    if "error" in baseline:
+        flow3.verdict = "ERROR"
+        flow3.body += f"\n— Ledger validation —\nbaseline snapshot failed: {baseline['error']}\n"
+        return
+
+    # The honest V1-lifecycle assertion: by the end of the dev_session run
+    # (and the runs that follow it within the same harness invocation), at
+    # least one decision should have transitioned from `pending` to a
+    # verdict state (`reflected` or `drifted`). That transition proves the
+    # full lifecycle — ensure_ledger_synced → link_commit → resolve_compliance
+    # → status verdict — completed somewhere in the run. The transition can
+    # be triggered by ANY bicameral tool call after HEAD moves; in practice
+    # it's often Flow 5's `bicameral.history` that provokes the chain. We
+    # don't try to attribute the transition to a specific flow — what
+    # matters is the V1 outcome materialised at all.
+    #
+    # Per #135 (post-commit hook is sync-only), the resolve_compliance step
+    # requires a caller-LLM. So this assertion implicitly tests the chain
+    # ALL THE WAY through, not just the sync. The compliance_check row
+    # count delta is reported alongside as an additional signal.
+    cc_before = baseline.get("compliance_checks", 0)
+    cc_after = after.get("compliance_checks", 0)
+    cc_delta = cc_after - cc_before
+
+    pending_before = baseline.get("by_status", {}).get("pending", 0)
+    pending_after = after.get("by_status", {}).get("pending", 0)
+    reflected_before = baseline.get("by_status", {}).get("reflected", 0)
+    reflected_after = after.get("by_status", {}).get("reflected", 0)
+    drifted_before = baseline.get("by_status", {}).get("drifted", 0)
+    drifted_after = after.get("by_status", {}).get("drifted", 0)
+
+    verdicts_written = (reflected_after - reflected_before) + (drifted_after - drifted_before)
+    pending_drained = pending_before - pending_after
+
+    # Flow 3's verdict is now purely ledger-based per the user-flow design:
+    # the commit-happened stream-json check is informational, not a gate.
+    # The V1 lifecycle is what we care about; whichever flow triggers it
+    # is fine.
+    ledger_passed = verdicts_written > 0 or cc_delta > 0
+    final_verdict = "PASS" if ledger_passed else "FAIL"
+
+    if verdicts_written > 0:
+        ledger_detail = (
+            f"✓ {verdicts_written} verdict(s) written during the run "
+            f"(reflected: {reflected_before}→{reflected_after}, "
+            f"drifted: {drifted_before}→{drifted_after}, "
+            f"pending: {pending_before}→{pending_after}). "
+            f"V1 lifecycle (ingest → bind → link_commit → resolve_compliance "
+            f"→ verdict) completed end-to-end."
+        )
+    elif cc_delta > 0:
+        ledger_detail = (
+            f"⚠ compliance_check rows grew by {cc_delta} ({cc_before}→{cc_after}) "
+            f"but no verdicts written — sync mechanism fired but resolve_compliance "
+            f"never ran. The caller-LLM step in the V1 chain didn't trigger; "
+            f"per #135 this is expected without an in-session bicameral call "
+            f"that surfaces pending checks to the agent."
+        )
+    else:
+        ledger_detail = (
+            f"✗ no compliance_check rows written ({cc_before}→{cc_after}) and "
+            f"no verdicts written. Either the bound decisions never had their "
+            f"sync triggered (no bicameral call after HEAD moves) or Flow 1's "
+            f"binding didn't land properly."
+        )
+
+    status_before = baseline.get("by_status", {})
+    status_after = after.get("by_status", {})
+    all_statuses = sorted(set(status_before) | set(status_after))
+    status_lines = "\n".join(
+        f"  {s:<22} {status_before.get(s, 0)} → {status_after.get(s, 0)}" for s in all_statuses
+    )
+    commit_note = (
+        "agent committed in Flow 3 (precondition met)"
+        if flow3.verdict == "PASS"
+        else "agent did NOT commit in Flow 3 (precondition NOT met — informational)"
+    )
+    flow3.body += (
+        f"\n— Ledger state (before → after dev_session) —\n"
+        f"session_id:               {session_id[:8]}…\n"
+        f"ledger:                   {LEDGER_DIR}\n"
+        f"total decisions:          {baseline.get('total_decisions', 0)} → {after.get('total_decisions', 0)}\n"
+        f"compliance_checks:        {cc_before} → {cc_after} (Δ={cc_delta:+d})\n"
+        f"verdicts written:         {verdicts_written}\n"
+        f"by status:\n{status_lines}\n\n"
+        f"stream-json precondition: {commit_note}\n"
+        f"ledger assertion:         {ledger_detail}\n"
+    )
+    # Flow 3's final verdict is the ledger result, not the commit precondition.
+    # The lifecycle outcome matters; the path through it is incidental.
+    flow3.verdict = final_verdict
+
+
+# ── Claude Code CLI invocation ──────────────────────────────────────────
+
+
+def run_claude_session(
+    flow_id: str,
+    prompt: str,
+    session_id: str | None = None,
+    is_first_in_group: bool = True,
+) -> tuple[list[dict], pathlib.Path, int]:
+    """Invoke ``claude -p`` with stream-json output. Return (tool_calls, transcript_path, exit_code).
+
+    stream-json emits one JSON object per line on stdout — system init, user
+    prompts, assistant turns (with tool_use blocks), tool results, and a final
+    result object. We capture all lines for the audit trail and extract
+    tool_use blocks for assertions.
+
+    When ``session_id`` is provided:
+      - First flow in the group uses ``--session-id <uuid>`` to claim the UUID
+        and create a persistent session on disk.
+      - Subsequent flows use ``--resume <uuid>`` to extend the same session
+        (full transcript history available to skills/hooks).
+      - ``--no-session-persistence`` is dropped (it would block the chain).
+
+    When ``session_id`` is None: standalone session, persistence disabled.
+    """
+    transcript_path = RESULTS_DIR / f"{flow_id}.ndjson"
+
+    cmd = [
+        "claude",
+        "-p",
+        prompt,
+        "--mcp-config",
+        str(MCP_CONFIG_PATH),
+        "--strict-mcp-config",
+        "--settings",
+        str(SETTINGS_PATH),
+        # Bash + Edit required for Flow 3's commit. Read/Grep for inspection.
+        "--allowed-tools",
+        "mcp__bicameral,Read,Grep,Edit,Bash",
+        "--output-format",
+        "stream-json",
+        "--verbose",  # required by stream-json for full event detail
+        "--max-budget-usd",
+        "2.0",
+        "--dangerously-skip-permissions",
+    ]
+    if session_id is None:
+        cmd.append("--no-session-persistence")
+    elif is_first_in_group:
+        cmd.extend(["--session-id", session_id])
+    else:
+        cmd.extend(["--resume", session_id])
+
+    chain_tag = ""
+    if session_id is not None:
+        chain_tag = f" [session={session_id[:8]} {'first' if is_first_in_group else 'resume'}]"
+    # cwd MUST be DESKTOP_REPO_PATH. The agent treats cwd as the primary
+    # codebase and resolves prompt-relative paths there. Iteration 2 used
+    # pilot/mcp as cwd → agent saw the Python MCP server, refused to act
+    # on `app/src/lib/git/reorder.ts` because that doesn't exist in the
+    # MCP server tree. The MCP server's REPO_PATH env (in the materialized
+    # MCP config) is independent of claude's cwd, and bicameral skills load
+    # from ~/.claude/skills/ regardless of cwd.
+    print(f"\n=== {flow_id} — invoking claude (cwd={DESKTOP_REPO_PATH}){chain_tag} ===")
+    proc = subprocess.run(
+        cmd,
+        cwd=DESKTOP_REPO_PATH,
+        capture_output=True,
+        text=True,
+        timeout=CLAUDE_SESSION_TIMEOUT_S,
+    )
+
+    transcript_path.write_text(proc.stdout, encoding="utf-8")
+    if proc.returncode != 0:
+        sys.stderr.write(
+            f"[{flow_id}] claude CLI exit={proc.returncode}\n"
+            f"  stderr (last 500 chars): {proc.stderr[-500:]}\n"
+        )
+
+    tool_calls = _extract_tool_calls(proc.stdout)
+    return tool_calls, transcript_path, proc.returncode
+
+
+def run_scaffolding_turn(session_id: str, label: str, prompt: str) -> int:
+    """Inject a scaffolding turn into a chained session to seed state.
+
+    Used when an upstream flow's auto-fire failed and we want to unblock
+    downstream flows by manually triggering the missing tool call. The
+    scaffolding turn IS allowed to name tools — its purpose is session-state
+    recovery, not auto-fire validation. The upstream flow's verdict still
+    measures auto-fire reliability honestly.
+
+    Logged to ``test-results/e2e/scaffolding-<label>.ndjson`` for diagnostics.
+    Not added to RESULTS, not asserted. Returns claude's exit code.
+    """
+    log_path = RESULTS_DIR / f"scaffolding-{label}.ndjson"
+    cmd = [
+        "claude",
+        "-p",
+        prompt,
+        "--mcp-config",
+        str(MCP_CONFIG_PATH),
+        "--strict-mcp-config",
+        "--settings",
+        str(SETTINGS_PATH),
+        "--allowed-tools",
+        "mcp__bicameral,Read,Grep,Edit,Bash",
+        "--output-format",
+        "stream-json",
+        "--verbose",
+        "--max-budget-usd",
+        "1.0",
+        "--dangerously-skip-permissions",
+        "--resume",
+        session_id,
+    ]
+    print(f"\n=== Scaffolding ({label}) — injecting into session={session_id[:8]} ===")
+    proc = subprocess.run(
+        cmd,
+        cwd=DESKTOP_REPO_PATH,
+        capture_output=True,
+        text=True,
+        timeout=180,
+    )
+    log_path.write_text(proc.stdout, encoding="utf-8")
+    tool_calls = _extract_tool_calls(proc.stdout)
+    bicameral_calls = _bicameral_tool_calls(tool_calls)
+    bcall_names = [c["name"].split("__")[-1] for c in bicameral_calls]
+    print(
+        f"    scaffolding tool calls: {len(tool_calls)} total, "
+        f"{len(bicameral_calls)} bicameral → {bcall_names}"
+    )
+    if proc.returncode != 0:
+        sys.stderr.write(
+            f"[scaffolding {label}] claude CLI exit={proc.returncode}\n"
+            f"  stderr (last 500 chars): {proc.stderr[-500:]}\n"
+        )
+    return proc.returncode
+
+
+def _extract_tool_calls(stream_json: str) -> list[dict]:
+    """Walk stream-json output, extract every tool_use block under mcp__bicameral.
+
+    stream-json shape: one JSON object per line. Assistant messages contain
+    ``message.content`` arrays with ``{"type":"tool_use","name":"...","input":{...}}``.
+    """
+    calls: list[dict] = []
+    for line in stream_json.splitlines():
+        line = line.strip()
+        if not line:
+            continue
+        try:
+            obj = json.loads(line)
+        except json.JSONDecodeError:
+            continue
+
+        # Assistant turns carry tool_use blocks
+        if obj.get("type") == "assistant":
+            content = (obj.get("message") or {}).get("content") or []
+            for block in content:
+                if isinstance(block, dict) and block.get("type") == "tool_use":
+                    calls.append(
+                        {
+                            "name": block.get("name", ""),
+                            "input": block.get("input") or {},
+                            "id": block.get("id", ""),
+                        }
+                    )
+    return calls
+
+
+def _bicameral_tool_calls(calls: list[dict]) -> list[dict]:
+    return [c for c in calls if c["name"].startswith("mcp__bicameral__")]
+
+
+def _calls_named(calls: list[dict], suffix: str) -> list[dict]:
+    """Return calls whose tool name ends with the given suffix (server-name-agnostic)."""
+    return [c for c in calls if c["name"].endswith(suffix) or c["name"].endswith(f"_{suffix}")]
+
+
+# ── Per-flow assertions ─────────────────────────────────────────────────
+
+
+def _ingest_payload(call: dict) -> dict:
+    """Extract the inner payload from an ingest tool call.
+
+    The MCP tool schema wraps the IngestPayload in a ``payload`` key. Some
+    skill versions also list mappings under ``decisions`` (the natural-LLM
+    spelling) rather than ``mappings`` (the internal field). Handle both.
+    """
+    inp = call.get("input") or {}
+    return inp.get("payload") or inp
+
+
+def _ingest_items(call: dict) -> list[dict]:
+    p = _ingest_payload(call)
+    return p.get("decisions") or p.get("mappings") or []
+
+
+# Feature-area binding sets for Flow 1. Each seeded decision can legitimately
+# anchor to any of several files in the desktop/desktop tree — the asserter
+# checks that *some* file in each area is bound, not which specific one.
+# Previously the asserter required the exact paths "cherry-pick.ts" and
+# "reorder.ts"; LLM nondeterminism on borderline cases (e.g. binding the
+# UI-layer commit-list.tsx instead of the git-layer reorder.ts) flaked the
+# test even though the functional outcome — drift detection has a code
+# anchor for each feature — was satisfied.
+#
+# The "Improved commit history" decision bundles four ops (drag-to-reorder,
+# drag-to-squash, amend, branch-from), so any of the files backing those is
+# a legitimate anchor. cherry-pick has both lib and UI surfaces and either
+# is acceptable.
+_CHERRY_PICK_AREA_PATHS: tuple[str, ...] = (
+    "cherry-pick.ts",
+    "cherry-pick.tsx",
+)
+_COMMIT_HISTORY_AREA_PATHS: tuple[str, ...] = (
+    # git-layer (canonical anchors for drift on the actual operations)
+    "/git/reorder.ts",
+    "/git/squash.ts",
+    "/git/commit.ts",
+    # ui-layer (legitimate when the decision is framed as a UX feature)
+    "/history/commit-list.tsx",
+    "/history/commit-list-item.tsx",
+    "/multi-commit-operation/reorder.tsx",
+    "/multi-commit-operation/squash.tsx",
+    "/dispatcher/dispatcher.ts",
+    # models / store layer (when bound as data-shape contracts)
+    "/models/multi-commit-operation.ts",
+    "/models/retry-actions.ts",
+    "/stores/app-store.ts",
+)
+
+
+def _bound_to_area(bind_targets: list[str], area_paths: tuple[str, ...]) -> bool:
+    """Return True iff any bound path matches any acceptable substring for the area."""
+    return any(any(sub in p for sub in area_paths) for p in bind_targets)
+
+
+def assert_flow_1(calls: list[dict]) -> tuple[bool, str]:
+    """Flow 1: PM ingests the seed roadmap decisions, anchors at least one
+    file in each of the cherry-pick and commit-history feature areas, and
+    ratifies. Subsequent flows depend on a CLEAN, RATIFIED, BOUND ledger as
+    their baseline.
+
+    Anchoring path: the canonical bicameral-ingest skill embeds bindings
+    inline via ``mappings[].code_regions[].file_path`` — there is no
+    separate ``bicameral.bind`` call for code that already exists. A
+    follow-up ``bicameral.bind`` is reserved for abstract decisions whose
+    code doesn't exist yet. This asserter accepts EITHER path.
+
+    The check is feature-area-scoped, not file-scoped: any of the files
+    listed in ``_CHERRY_PICK_AREA_PATHS`` / ``_COMMIT_HISTORY_AREA_PATHS``
+    counts as a legitimate anchor for the corresponding decision. The
+    earlier exact-filename check ("cherry-pick.ts" + "reorder.ts" only)
+    flaked when the LLM picked an equally valid UI-layer file like
+    ``commit-list.tsx`` for the bundled commit-history decision.
+    """
+    bcalls = _bicameral_tool_calls(calls)
+    names = [c["name"].split("__")[-1] for c in bcalls]
+
+    ingest_calls = _calls_named(bcalls, "bicameral_ingest")
+    if not ingest_calls:
+        return False, (f"expected bicameral.ingest; saw {len(bcalls)} bicameral calls: {names}")
+
+    # Walk every ingest call's mappings[].code_regions[].file_path to find
+    # the bound files. Modern flow embeds binding here; agent may also fall
+    # back to a follow-up bicameral.bind for ungrounded decisions.
+    bind_targets: list[str] = []
+    total_items = 0
+    for c in ingest_calls:
+        items = _ingest_items(c)
+        total_items += len(items)
+        for item in items:
+            for region in (item or {}).get("code_regions") or []:
+                path = (region or {}).get("file_path") or (region or {}).get("path") or ""
+                if path:
+                    bind_targets.append(path)
+
+    if total_items < 1:
+        payload = _ingest_payload(ingest_calls[0])
+        return False, (
+            f"ingest called without decisions/mappings (payload keys: {list(payload.keys())})"
+        )
+
+    # Also accept any explicit bicameral.bind calls (still valid for the
+    # ungrounded-then-bind path).
+    bind_calls = _calls_named(bcalls, "bicameral_bind")
+    for c in bind_calls:
+        binp = c.get("input") or {}
+        bpayload = binp.get("payload") or binp
+        for span in bpayload.get("spans") or bpayload.get("bindings") or []:
+            path = (span or {}).get("file_path") or (span or {}).get("path") or ""
+            if path:
+                bind_targets.append(path)
+
+    has_cp_area = _bound_to_area(bind_targets, _CHERRY_PICK_AREA_PATHS)
+    has_commit_history_area = _bound_to_area(bind_targets, _COMMIT_HISTORY_AREA_PATHS)
+    if not (has_cp_area and has_commit_history_area):
+        missing = [
+            label
+            for label, present in (
+                ("cherry-pick area", has_cp_area),
+                ("commit-history area", has_commit_history_area),
+            )
+            if not present
+        ]
+        return False, (
+            f"bind missing feature area(s): {missing}; checked "
+            f"ingest.mappings[].code_regions and bicameral.bind calls; saw bound "
+            f"paths: {bind_targets}; expected at least one path per missing area "
+            f"matching cherry-pick: {list(_CHERRY_PICK_AREA_PATHS)} or "
+            f"commit-history: {list(_COMMIT_HISTORY_AREA_PATHS)}; sequence: {names}"
+        )
+
+    # Ratify: PM blesses the just-ingested decisions. Flow 5 walks the
+    # `proposed` queue — flow 1's seeds must NOT remain in `proposed` or
+    # they'd contaminate flow 5's "what's queued for adoption" view.
+    ratify_calls = _calls_named(bcalls, "bicameral_ratify")
+    if not ratify_calls:
+        return False, (
+            f"expected bicameral.ratify after ingest (PM blesses adoption); saw: {names}"
+        )
+
+    binding_path = "inline code_regions" if not bind_calls else "inline + follow-up bind"
+    return True, (
+        f"ingest({total_items} items, {binding_path}) → cherry-pick + commit-history "
+        f"feature areas bound (paths: {bind_targets}); "
+        f"ratify({len(ratify_calls)}); sequence: {names}"
+    )
+
+
+def assert_flow_2(calls: list[dict]) -> tuple[bool, str]:
+    """Flow 2: dev requests a refactor that contradicts the seeded REORDER
+    decision. This asserter validates ONLY the auto-fire scope of #146 — did
+    ``bicameral.preflight`` fire on the affected file before the agent
+    side-effected the codebase?
+
+    Read is deliberately allowed before/in-parallel-with preflight: agents
+    legitimately read in parallel with preflight to keep latency reasonable,
+    and the contract that matters is "preflight gates writes." Edit / Bash
+    write-ops are the line; preflight must precede the first one.
+
+    The end-to-end correction-capture loop (agent_session ingest +
+    resolve_collision) is asserted separately by Flow 2a, which reuses this
+    flow's transcript so the same claude session is graded on two
+    independent properties without a duplicate API call.
+    """
+    bcalls = _bicameral_tool_calls(calls)
+    names = [c["name"].split("__")[-1] for c in bcalls]
+
+    # 1. preflight fired (hook-driven auto-trigger on "refactor" verb)
+    preflight_calls = _calls_named(bcalls, "bicameral_preflight")
+    if not preflight_calls:
+        return False, f"expected preflight (auto-fired); saw: {names}"
+
+    file_paths = preflight_calls[0]["input"].get("file_paths") or []
+    if not file_paths or not any("reorder.ts" in p for p in file_paths):
+        return False, (
+            f"preflight called without reorder.ts in file_paths (the file the dev "
+            f"asked to refactor); got: {file_paths}"
+        )
+
+    # 2. preflight precedes the first WRITE op (Edit / Write / git-commit Bash).
+    # Reads are allowed in parallel — they don't side-effect.
+    first_preflight_idx = next(
+        (i for i, c in enumerate(calls) if c["name"].endswith("bicameral_preflight")),
+        None,
+    )
+    write_tools = ("Edit", "Write", "NotebookEdit")
+    first_write_idx = next(
+        (
+            i
+            for i, c in enumerate(calls)
+            if c["name"] in write_tools
+            or (c["name"] == "Bash" and "git commit" in (c.get("input") or {}).get("command", ""))
+        ),
+        None,
+    )
+    if first_write_idx is not None and (
+        first_preflight_idx is None or first_preflight_idx > first_write_idx
+    ):
+        return False, (
+            f"preflight did not precede first write op (auto-fire contract violated); "
+            f"first preflight at idx {first_preflight_idx}, first write at idx {first_write_idx}"
+        )
+
+    return True, (f"preflight auto-fired on reorder.ts; preceded first write op; sequence: {names}")
+
+
+def assert_flow_2a(calls: list[dict]) -> tuple[bool, str]:
+    """Flow 2a: contradiction-capture disambiguation. Reuses Flow 2's tool
+    calls (same claude session). The contract under #175 (D path): when
+    preflight surfaces ≥1 decision, the agent must not silently judge
+    contradiction — it must call ``AskUserQuestion`` with a disambiguation
+    shape (Step 5.6.1) so the user picks ``supersede`` / ``keep_both`` /
+    ``unrelated``. The actual ingest+resolve_collision sequence (Step 5.6.2)
+    only fires AFTER the user answers, which means it can't be driven in
+    headless ``claude -p``. The testable signal in CI is the question
+    invocation itself.
+
+    What this asserter checks:
+
+      - Preflight fired with ≥1 surfaced decision in Flow 2 (precondition;
+        if not, this flow has nothing to grade).
+      - At least one ``AskUserQuestion`` invocation appears in the
+        transcript AFTER the preflight call. The question's shape must
+        plausibly be the Step 5.6.1 disambiguation: text mentioning a
+        surfaced decision_id OR the keywords ``refinement`` / ``supersede``
+        / ``keep both`` / options labeled with the supersede/keep-both/
+        unrelated trichotomy.
+
+    What this asserter NO LONGER requires (versus pre-#175):
+
+      - ``bicameral.ingest(source=agent_session)`` — depends on the user's
+        answer, undriveable in headless mode.
+      - ``bicameral.resolve_collision`` — same.
+
+    Both still flow through Step 5.6.2 in interactive sessions; CI just
+    can't simulate the human. See #175 for the design discussion.
+    """
+    bcalls = _bicameral_tool_calls(calls)
+    names = [c["name"].split("__")[-1] for c in bcalls]
+
+    preflight_calls = _calls_named(bcalls, "bicameral_preflight")
+    if not preflight_calls:
+        return False, (
+            f"precondition NOT met — Flow 2 did not call bicameral_preflight; sequence: {names}"
+        )
+
+    # Did preflight return at least one surfaced decision? Without a hit, the
+    # disambiguation question shouldn't fire — and Flow 2a has no signal.
+    surfaced_decision_ids: list[str] = []
+    for c in preflight_calls:
+        # Server response lives in the corresponding tool_result block; the
+        # tool_call we have here only carries inputs. Walk the full call list
+        # to find tool_result entries with our tool_use_id.
+        tool_use_id = c.get("id") or ""
+        for r in calls:
+            if r.get("type") != "tool_result":
+                continue
+            if r.get("tool_use_id") != tool_use_id:
+                continue
+            content = r.get("content", "")
+            text = content if isinstance(content, str) else json.dumps(content)
+            for marker in ("decision:",):
+                if marker in text:
+                    # Extract decision IDs as best we can (presence is the
+                    # signal; exact list isn't load-bearing here).
+                    surfaced_decision_ids.append(marker)
+                    break
+
+    # Find AskUserQuestion calls that fall AFTER the first preflight call.
+    first_preflight_idx = next(
+        (i for i, c in enumerate(calls) if c.get("name", "").endswith("bicameral_preflight")),
+        None,
+    )
+    if first_preflight_idx is None:
+        return False, f"preflight call index not found in tool calls; sequence: {names}"
+
+    ask_user_calls = [
+        c
+        for i, c in enumerate(calls)
+        if i > first_preflight_idx and c.get("name") == "AskUserQuestion"
+    ]
+    if not ask_user_calls:
+        return False, (
+            f"expected AskUserQuestion (Step 5.6.1 disambiguation) after preflight surfaced "
+            f"decisions; saw none. sequence: {names}"
+        )
+
+    # Validate the question shape — must look like the Step 5.6.1 contract:
+    # mentions a surfaced decision OR contains the supersede/keep_both/
+    # unrelated trichotomy. Loose check; the asserter doesn't try to grade
+    # whether the agent picked the right surfaced decision (that's product
+    # judgment, not a contract test).
+    SHAPE_KEYWORDS = (
+        "supersede",
+        "keep both",
+        "keep_both",
+        "unrelated",
+        "refinement of",
+        "refinement of that",
+        "decision:",
+    )
+    matched = None
+    for c in ask_user_calls:
+        inp = c.get("input") or {}
+        # AskUserQuestion accepts either a top-level question or nested
+        # questions[]; tolerate both shapes.
+        candidate_texts: list[str] = []
+        q = inp.get("question")
+        if isinstance(q, str):
+            candidate_texts.append(q)
+        for nested in inp.get("questions") or []:
+            if isinstance(nested, dict) and isinstance(nested.get("question"), str):
+                candidate_texts.append(nested["question"])
+            for opt in (nested or {}).get("options") or []:
+                if isinstance(opt, dict):
+                    if isinstance(opt.get("label"), str):
+                        candidate_texts.append(opt["label"])
+                    if isinstance(opt.get("description"), str):
+                        candidate_texts.append(opt["description"])
+        for opt in inp.get("options") or []:
+            if isinstance(opt, dict):
+                if isinstance(opt.get("label"), str):
+                    candidate_texts.append(opt["label"])
+                if isinstance(opt.get("description"), str):
+                    candidate_texts.append(opt["description"])
+        haystack = " | ".join(candidate_texts).lower()
+        if any(k in haystack for k in SHAPE_KEYWORDS):
+            matched = c
+            break
+
+    if matched is None:
+        return False, (
+            f"AskUserQuestion was called {len(ask_user_calls)} time(s) after preflight, but "
+            f"none matched the Step 5.6.1 disambiguation shape (expected one of: "
+            f"{SHAPE_KEYWORDS}); sequence: {names}"
+        )
+
+    return True, (
+        f"AskUserQuestion fired after preflight with disambiguation shape "
+        f"(Step 5.6.1 signal); sequence: {names}"
+    )
+
+
+def assert_flow_3(calls: list[dict]) -> tuple[bool, str]:
+    """Flow 3 (chained dev session): dev implements the high-signal
+    notification feature (the only Flow-1 decision that's still
+    ungrounded — cherry-pick + reorder are already reflected from Flow 1's
+    inline binding) and commits. The prompt is intentionally minimal:
+    implement + commit, no bicameral verbs, no status checks.
+
+    Per bicameral-mcp #135, the post-commit hook is sync-only by design —
+    it just prints a reminder to the agent. ``link_commit`` runs server-side
+    via ``ensure_ledger_synced`` on the next bicameral tool call after HEAD
+    moves (naturally happens in Flow 4's preflight), and ``resolve_compliance``
+    requires a caller-LLM in-session (the hook can't trigger it).
+
+    Per-flow assertion: did the agent actually run ``git commit``? That's
+    the only thing this flow controls. The interesting outcome — a
+    decision flipping to ``pending`` after the commit — is validated by the
+    post-hoc ledger query (``_assert_dev_session_ledger_state``) that runs
+    after the whole ``dev_session`` group completes.
+    """
+    bash_calls = [c for c in calls if c.get("name") == "Bash"]
+    commit_calls = [
+        c for c in bash_calls if "git commit" in (c.get("input") or {}).get("command", "")
+    ]
+    if not commit_calls:
+        bash_cmds = [(c.get("input") or {}).get("command", "")[:60] for c in bash_calls]
+        return False, (
+            f"expected a `git commit` Bash call (the prompt asks for a commit); "
+            f"saw {len(bash_calls)} Bash call(s): {bash_cmds}"
+        )
+    return True, (
+        f"git commit executed ({len(commit_calls)} call(s)). Status flip to "
+        "`pending` validated post-hoc via ledger query at end of dev_session."
+    )
+
+
+def assert_flow_4(calls: list[dict]) -> tuple[bool, str]:
+    """Flow 4 (chained dev session): mid-flow correction. The user surfaces
+    a load-bearing constraint about the cherry-pick conflict path as an
+    aside — using correction markers (``wait``, ``shouldn't``, ``wrong``)
+    and NO explicit tracking verbs (``track this`` / ``log this`` /
+    ``lock this in``). The user then asks for code work, which should
+    trigger ``bicameral-preflight``; preflight step 3.5 invokes
+    ``bicameral-capture-corrections`` in in-session mode; capture-corrections
+    finds the constraint and ingests it with ``source=agent_session``.
+
+    What this asserter checks (outcome, not path):
+      1. ``bicameral_preflight`` fired (proves the chained session passed
+         the dev's "continue refactor" intent through to the right skill).
+      2. EITHER an ``agent_session``-sourced ingest landed (capture-
+         corrections in-session ingested the constraint as mechanical) OR
+         capture-corrections did at least invoke ``bicameral_search`` for
+         dedup (Step C ran — the rubric processed the markers and just
+         classified the constraint as ``ask`` instead of mechanical).
+
+    The SessionEnd hook spawns ``/bicameral:capture-corrections`` as a
+    SEPARATE subprocess; its tool calls are NOT visible in this stream-json.
+    That out-of-band path is the realistic production behaviour and is
+    validated by querying the ledger after the harness completes — not
+    here. This asserter only checks what's observable in-stream.
+    """
+    bcalls = _bicameral_tool_calls(calls)
+    names = [c["name"].split("__")[-1] for c in bcalls]
+
+    preflight_calls = _calls_named(bcalls, "bicameral_preflight")
+    if not preflight_calls:
+        return False, (
+            f"expected bicameral.preflight to fire on the dev's 'continue refactor' "
+            f"request (the in-session capture-corrections invocation hangs off "
+            f"preflight step 3.5); saw: {names}"
+        )
+
+    # Outcome path A — capture-corrections auto-ingested as mechanical.
+    ingest_calls = _calls_named(bcalls, "bicameral_ingest")
+    agent_session_ingest = None
+    for c in ingest_calls:
+        payload = _ingest_payload(c)
+        top_source = payload.get("source", "")
+        span_sources = [(m.get("span") or {}).get("source_type", "") for m in _ingest_items(c)]
+        if top_source == "agent_session" or "agent_session" in span_sources:
+            agent_session_ingest = c
+            break
+
+    # Outcome path B — capture-corrections ran Step C dedup (search) and
+    # classified the constraint as `ask` (which doesn't auto-ingest in
+    # headless without user confirmation). The search call is the
+    # observable signal that capture-corrections processed the markers.
+    search_calls = _calls_named(bcalls, "bicameral_search")
+
+    if agent_session_ingest is None and not search_calls:
+        return False, (
+            f"preflight fired but neither path-A (agent_session ingest) nor path-B "
+            f"(bicameral.search from capture-corrections Step C) was observed — "
+            f"capture-corrections did not appear to process the in-session "
+            f"corrections. sequence: {names}"
+        )
+
+    if agent_session_ingest is not None:
+        return True, (
+            f"preflight + agent_session ingest fired (path A — mechanical "
+            f"auto-ingest); sequence: {names}"
+        )
+    return True, (
+        f"preflight + bicameral.search fired (path B — capture-corrections Step C "
+        f"dedup ran; constraint classified as `ask`, awaits user confirmation); "
+        f"sequence: {names}"
+    )
+
+
+def assert_flow_5(calls: list[dict]) -> tuple[bool, str]:
+    """Flow 5: PM Friday review. Inbox is real because state persists from
+    flows 1/2/4. Expect history (the review query) + IF there's anything
+    in the proposed queue, ratify it.
+
+    The ratify call is conditional, not unconditional: if upstream flows
+    produced no new proposals (e.g. Flow 1 already ratified its 3 seeds
+    and Flow 2's collision didn't produce a refinement), there's literally
+    nothing to ratify and the prompt's instruction "ratify if you find
+    anything ready" is honestly satisfied by a no-op. Forcing ratify here
+    would catch a cascade failure from Flow 2 as if it were a Flow 5 bug.
+
+    Per #108 Flow 5 spec: history + (ratify if proposals exist). The "if"
+    is load-bearing — see step 4: "Step 3 is silent if no proposals exist."
+    """
+    bcalls = _bicameral_tool_calls(calls)
+    names = [c["name"].split("__")[-1] for c in bcalls]
+
+    history_calls = _calls_named(bcalls, "bicameral_history")
+    if not history_calls:
+        return False, f"expected bicameral.history; saw: {names}"
+
+    ratify_calls = _calls_named(bcalls, "bicameral_ratify")
+    if ratify_calls:
+        return True, (
+            f"bicameral.history + ratify({len(ratify_calls)}) — PM ratified "
+            f"queued proposal(s); sequence: {names}"
+        )
+    return True, (
+        f"bicameral.history fired; no ratify (no proposals in queue — "
+        f"Flow 1 ratified its 3 seeds and upstream chain may not have "
+        f"produced new proposals); sequence: {names}"
+    )
+
+
+FLOW_PLAN: list[FlowSpec] = [
+    FlowSpec(
+        flow_id="Flow 1",
+        prompt_file="flow-1-ingest.md",
+        asserter=assert_flow_1,
+        category="mcp_layer",
+    ),
+    # Flows 2/3/4 share session group "dev_session" — chained via
+    # --session-id + --resume so Flow 4's capture-corrections has real
+    # transcript history (Flow 2's refactor request, Flow 3's commit) to
+    # scan against, and the SessionEnd hook fires on the rich accumulated
+    # transcript at Flow 4's exit. Without chaining, capture-corrections
+    # can't operate honestly — it's designed to scan multi-turn history.
+    FlowSpec(
+        flow_id="Flow 2",
+        prompt_file="flow-2-preflight.md",
+        asserter=assert_flow_2,
+        # Auto-fire alone is the deterministic hook surface (UserPromptSubmit
+        # → bicameral.preflight on reorder.ts before any write op). MCP-layer
+        # because the contract is a single tool call wired by a hook, not a
+        # multi-step agentic skill walk.
+        category="mcp_layer",
+        session_group="dev_session",
+    ),
+    FlowSpec(
+        flow_id="Flow 2a",
+        prompt_file="flow-2-preflight.md",
+        asserter=assert_flow_2a,
+        category="agentic_layer",
+        session_group="dev_session",
+        # Reuse Flow 2's transcript — same claude session, second assertion.
+        # Avoids running flow-2-preflight.md twice and keeps both verdicts
+        # honest (the same session is judged on two independent properties).
+        reuses_flow="Flow 2",
+        advisory=(
+            "Skill-layer gap: bicameral-preflight surfaces decisions but does "
+            "not instruct the agent to (a) ingest a refinement with "
+            "source=agent_session when the user's prompt contradicts a "
+            "surfaced decision, or (b) call resolve_collision to wire the "
+            "refinement to the seeded decision. Tracked as P0 — see "
+            "BicameralAI/bicameral-mcp#154. Independent of #146 auto-fire."
+        ),
+    ),
+    FlowSpec(
+        flow_id="Flow 3",
+        prompt_file="flow-3-commit-sync.md",
+        asserter=assert_flow_3,
+        category="agentic_layer",
+        session_group="dev_session",
+        # link_commit auto-fire is no longer asserted here — that path is
+        # validated via the interactive recording (tmux real-TUI). This
+        # flow's role in the chain is to put a real edit + commit into the
+        # session transcript so Flow 4 has authentic dev-workflow context.
+    ),
+    FlowSpec(
+        flow_id="Flow 4",
+        prompt_file="flow-4-session-end.md",
+        asserter=assert_flow_4,
+        category="agentic_layer",
+        session_group="dev_session",
+        advisory=(
+            "Flow 4 captures an emerging constraint via correction markers "
+            '("wait", "shouldn\'t") — no collision-detection involved. NOT '
+            "the same gap as #154 (which is Flow 2a / contradiction-with-"
+            "prior-decision specific). The substrate fixes in this PR "
+            "(.bicameral/ bootstrap + --mcp-config passthrough) close real "
+            "drift, but path-X-(b) still won't fire end-to-end because the "
+            "canonical SessionEnd hook command can't pass the parent "
+            "transcript to the spawned subprocess AND --auto-ingest is the "
+            "wrong shape for background capture. Both tracked as P1 — see "
+            "BicameralAI/bicameral-mcp#156 for the design pivot to "
+            "next-session surfacing via a transcript queue."
+        ),
+    ),
+    FlowSpec(
+        flow_id="Flow 5",
+        prompt_file="flow-5-history.md",
+        asserter=assert_flow_5,
+        category="mcp_layer",
+    ),
+]
+
+
+# ── Main ────────────────────────────────────────────────────────────────
+
+
+def main() -> int:
+    print("=== v0 user flow e2e — Claude Code CLI sessions ===")
+    print(f"DESKTOP_REPO_PATH:  {DESKTOP_REPO_PATH}")
+    print(f"MCP config:         {MCP_CONFIG_PATH}")
+    print(f"Ledger (persisted): {LEDGER_DIR}")
+    print(f"Transcripts:        {RESULTS_DIR}")
+    print(f"Flows:              {len(FLOW_PLAN)}\n")
+
+    _clean_ledger()
+    _reset_desktop_repo()
+    _bootstrap_bicameral_dir()
+
+    # One UUID per session_group, allocated lazily as we encounter the group.
+    # ``group_seen`` tracks which groups have already had their first flow run
+    # so subsequent flows know to use --resume rather than --session-id.
+    import uuid
+
+    group_session_ids: dict[str, str] = {}
+    group_seen: set[str] = set()
+    chained_groups = sorted({s.session_group for s in FLOW_PLAN if s.session_group})
+    if chained_groups:
+        print("Chained session groups:")
+        for g in chained_groups:
+            sid = str(uuid.uuid4())
+            group_session_ids[g] = sid
+            members = [
+                s.flow_id
+                for s in FLOW_PLAN
+                if s.session_group == g and not s.skip and not s.reuses_flow
+            ]
+            print(f"  {g}: {sid[:8]}…  → {' → '.join(members)}")
+        print()
+
+    # Snapshot ledger state *between* Flow 1 and dev_session so the
+    # post-hoc validation can compute a real delta. Captured lazily —
+    # taken just before the first dev_session flow runs.
+    dev_session_baseline: dict | None = None
+
+    for spec in FLOW_PLAN:
+        # Snapshot baseline once, immediately before the first dev_session
+        # flow. This means Flow 1's effects are baked in but Flow 2/3/4's
+        # effects (the ones we want to measure) are not.
+        if dev_session_baseline is None and spec.session_group == "dev_session" and not spec.skip:
+            print("\n=== Snapshotting ledger baseline before dev_session ===")
+            dev_session_baseline = _snapshot_ledger()
+            if "error" in dev_session_baseline:
+                sys.stderr.write(f"baseline snapshot failed: {dev_session_baseline['error']}\n")
+            else:
+                print(
+                    f"    baseline: {dev_session_baseline.get('total_decisions', 0)} decisions, "
+                    f"{dev_session_baseline.get('compliance_checks', 0)} compliance_check rows, "
+                    f"by_status={dev_session_baseline.get('by_status', {})}"
+                )
+
+        if spec.skip:
+            print(f"\n=== {spec.flow_id} — SKIPPED (see advisory) ===")
+            section(
+                FlowResult(
+                    flow_id=spec.flow_id,
+                    prompt_file=spec.prompt_file,
+                    verdict="SKIP",
+                    body=(
+                        f"prompt:                   {spec.prompt_file}\n"
+                        f"category:                 {spec.category}\n"
+                        f"claude exit:              n/a (not invoked)\n"
+                        f"transcript:               n/a\n"
+                        f"total tool calls:         0\n"
+                        f"bicameral tool calls:     0\n\n"
+                        f"assertion: skipped — see advisory\n"
+                    ),
+                    category=spec.category,
+                    advisory=spec.advisory,
+                )
+            )
+            continue
+
+        if spec.reuses_flow:
+            # Re-grade an earlier flow's transcript with this asserter. No
+            # claude invocation; the source flow already paid for the API
+            # call and emitted the transcript we read here.
+            source = next((r for r in RESULTS if r.flow_id == spec.reuses_flow), None)
+            if source is None:
+                section(
+                    FlowResult(
+                        flow_id=spec.flow_id,
+                        prompt_file=spec.prompt_file,
+                        verdict="ERROR",
+                        body=(
+                            f"reuses_flow={spec.reuses_flow!r} not found in RESULTS — "
+                            f"declare the source flow earlier in FLOW_PLAN"
+                        ),
+                        category=spec.category,
+                        advisory=spec.advisory,
+                    )
+                )
+                continue
+            print(
+                f"\n=== {spec.flow_id} — re-grading {source.flow_id}'s transcript "
+                f"({len(source.tool_calls)} tool calls) ==="
+            )
+            passed, detail = spec.asserter(source.tool_calls)
+            bicameral_calls = _bicameral_tool_calls(source.tool_calls)
+            body = (
+                f"prompt:                   {spec.prompt_file} (reused from {source.flow_id})\n"
+                f"category:                 {spec.category}\n"
+                f"claude exit:              n/a (transcript reused)\n"
+                f"transcript:               {source.transcript_path}\n"
+                f"total tool calls:         {len(source.tool_calls)}\n"
+                f"bicameral tool calls:     {len(bicameral_calls)}\n"
+                f"  → {[c['name'].split('__')[-1] for c in bicameral_calls]}\n\n"
+                f"assertion: {detail}\n"
+            )
+            section(
+                FlowResult(
+                    flow_id=spec.flow_id,
+                    prompt_file=spec.prompt_file,
+                    verdict="PASS" if passed else "FAIL",
+                    body=body,
+                    category=spec.category,
+                    advisory=spec.advisory,
+                    tool_calls=source.tool_calls,
+                    transcript_path=source.transcript_path,
+                )
+            )
+            continue
+
+        prompt_path = PROMPTS_DIR / spec.prompt_file
+        prompt = prompt_path.read_text(encoding="utf-8")
+        session_id = group_session_ids.get(spec.session_group) if spec.session_group else None
+        is_first = spec.session_group is not None and spec.session_group not in group_seen
+        if spec.session_group is not None:
+            group_seen.add(spec.session_group)
+        try:
+            tool_calls, transcript_path, exit_code = run_claude_session(
+                spec.flow_id, prompt, session_id=session_id, is_first_in_group=is_first
+            )
+        except subprocess.TimeoutExpired:
+            section(
+                FlowResult(
+                    flow_id=spec.flow_id,
+                    prompt_file=spec.prompt_file,
+                    verdict="ERROR",
+                    body=f"claude CLI session timed out (>{CLAUDE_SESSION_TIMEOUT_S}s)",
+                    category=spec.category,
+                    advisory=spec.advisory,
+                )
+            )
+            continue
+        except Exception as exc:
+            section(
+                FlowResult(
+                    flow_id=spec.flow_id,
+                    prompt_file=spec.prompt_file,
+                    verdict="ERROR",
+                    body=f"claude CLI invocation failed: {exc!r}",
+                    category=spec.category,
+                    advisory=spec.advisory,
+                )
+            )
+            continue
+
+        passed, detail = spec.asserter(tool_calls)
+        bicameral_calls = _bicameral_tool_calls(tool_calls)
+
+        body = (
+            f"prompt:                   {spec.prompt_file}\n"
+            f"category:                 {spec.category}\n"
+            f"claude exit:              {exit_code}\n"
+            f"transcript:               {transcript_path.relative_to(RESULTS_DIR.parents[1])}\n"
+            f"total tool calls:         {len(tool_calls)}\n"
+            f"bicameral tool calls:     {len(bicameral_calls)}\n"
+            f"  → {[c['name'].split('__')[-1] for c in bicameral_calls]}\n\n"
+            f"assertion: {detail}\n"
+        )
+        section(
+            FlowResult(
+                flow_id=spec.flow_id,
+                prompt_file=spec.prompt_file,
+                verdict="PASS" if passed else "FAIL",
+                body=body,
+                category=spec.category,
+                advisory=spec.advisory,
+                tool_calls=tool_calls,
+                transcript_path=str(transcript_path),
+            )
+        )
+
+        # Cascade-failure decoupling: if Flow 2's preflight auto-fire failed
+        # in the chained dev_session, inject a manual preflight call so Flow
+        # 3 / Flow 4 don't inherit a broken state. Flow 2's verdict above
+        # still measures auto-fire reliability honestly — this scaffolding
+        # is only state recovery for downstream flows. The scaffolding turn
+        # is allowed to name the tool because it isn't a tested flow.
+        if spec.flow_id == "Flow 2" and spec.session_group == "dev_session" and not passed:
+            run_scaffolding_turn(
+                session_id=group_session_ids["dev_session"],
+                label="post-flow2-preflight",
+                prompt=(
+                    "Quick — please call bicameral.preflight on "
+                    "app/src/lib/git/reorder.ts before we keep going on the "
+                    "refactor. I want to see what existing decisions might apply."
+                ),
+            )
+
+    # Post-hoc ledger validation merges into Flow 3's verdict. Runs AFTER
+    # all flows complete so that ensure_ledger_synced (server-side, fires on
+    # the next bicameral tool call after HEAD moves) has had a chance to
+    # apply link_commit and write pending compliance checks. This is Flow 3's
+    # REAL assertion — the stream-json check (did git commit happen) is just
+    # a precondition.
+    if "dev_session" in group_session_ids:
+        if dev_session_baseline is None:
+            dev_session_baseline = {"error": "baseline never captured"}
+        _validate_flow3_via_ledger(group_session_ids["dev_session"], dev_session_baseline)
+        # Phase 1 of plan-147-flow4-ledger-validation.md: path-X-(b)
+        # post-hoc ledger query for the SessionEnd subprocess effect.
+        _validate_flow4_via_ledger()
+
+    _print_report()
+
+    # CI gate: a flow blocks merge ONLY if it FAILs without an `advisory` text.
+    # Advisory failures document known gaps (with linked issue numbers) — they
+    # surface loudly in the report but do not red-light CI. This lets the
+    # harness keep running these assertions every PR (so we notice when a
+    # gap silently CLOSES) without making every PR also pay for the open gap.
+    blocking_failures = [r for r in RESULTS if r.verdict in ("FAIL", "ERROR") and not r.advisory]
+    return 0 if not blocking_failures else 1
+
+
+def _print_report() -> None:
+    """Print the per-flow detail, then a sharable summary table that surfaces
+    the MCP-layer vs agentic-layer split and any advisory text on failures.
+    The summary is designed to be paste-able into a PR comment or shared
+    alongside the demo recording so reviewers can see at a glance which
+    flows validate the tool surface vs which flows still need the agentic
+    layer to come through.
+    """
+    print("\n\n=== PER-FLOW DETAIL ===\n")
+    for r in RESULTS:
+        marker = _verdict_marker(r)
+        print(f"\n## {r.flow_id} — {marker} {r.verdict}  ({r.category})\n")
+        print(r.body)
+
+    # Header banner
+    print("\n" + "═" * 78)
+    print("  e2e SUMMARY — sharable")
+    print("═" * 78 + "\n")
+
+    # Table
+    fmt = f"{'Flow':<14} {'Layer':<14} {'Verdict':<10} {'What it validates'}"
+    print(fmt)
+    print("-" * 14 + " " + "-" * 14 + " " + "-" * 10 + " " + "-" * 40)
+    for r in RESULTS:
+        marker = _verdict_marker(r)
+        layer_label = {
+            "mcp_layer": "MCP layer",
+            "agentic_layer": "Agentic",
+            "ledger_state": "Ledger",
+        }.get(r.category, r.category)
+        what = _flow_one_line(r.flow_id)
+        print(f"{r.flow_id:<14} {layer_label:<14} {marker} {r.verdict:<8} {what}")
+
+    blocking_failures = [r for r in RESULTS if r.verdict in ("FAIL", "ERROR") and not r.advisory]
+    advisory_failures = [r for r in RESULTS if r.verdict == "FAIL" and r.advisory]
+    overall_pass = not blocking_failures
+    overall_marker = "✅" if overall_pass else "❌"
+    overall_label = "PASS" if overall_pass else "FAIL"
+    if overall_pass and advisory_failures:
+        overall_label = f"PASS ({len(advisory_failures)} advisory failure(s) — see below)"
+    print(f"\n{overall_marker} Overall: {overall_label}")
+
+    # MCP-layer vs agentic-layer breakdown — SKIP excluded from both totals
+    # (skipped flows are documented gaps, not pending validation work).
+    mcp_results = [r for r in RESULTS if r.category == "mcp_layer" and r.verdict != "SKIP"]
+    agentic_results = [r for r in RESULTS if r.category == "agentic_layer" and r.verdict != "SKIP"]
+    mcp_pass = sum(1 for r in mcp_results if r.verdict == "PASS")
+    agentic_pass = sum(1 for r in agentic_results if r.verdict == "PASS")
+    skipped = [r for r in RESULTS if r.verdict == "SKIP"]
+    print(f"\n   MCP-tool surface:    {mcp_pass}/{len(mcp_results)} validating tool callability")
+    print(
+        f"   Agentic auto-fire:   {agentic_pass}/{len(agentic_results)} "
+        "(skills auto-firing on natural intent — see advisories below)"
+    )
+    if skipped:
+        print(
+            f"   Skipped:             {len(skipped)} "
+            "(deferred to interactive recording — see advisories)"
+        )
+
+    # Advisories — render for flows that have them, regardless of verdict.
+    # An agentic-layer flow that PASSES still earns its advisory if the prompt
+    # leaks tool-name hints (compromised pass). SKIP gets its own tag.
+    advised = [r for r in RESULTS if r.advisory]
+    if advised:
+        print("\n" + "─" * 78)
+        print("  ADVISORIES — flows with caveats / known gaps")
+        print("─" * 78)
+        for r in advised:
+            if r.verdict == "SKIP":
+                tag = "⏭  SKIPPED"
+            elif r.verdict == "PASS":
+                tag = "⚠️  COMPROMISED PASS"
+            else:
+                tag = "⚠️  FAILED"
+            print(f"\n  {r.flow_id} — {tag}")
+            print(f"  {r.advisory}")
+
+    # What this means
+    if any(r.advisory for r in RESULTS):
+        print("\n" + "─" * 78)
+        print("  CORRECTION-PATH STATUS")
+        print("─" * 78)
+        print(
+            "  The end-to-end correction dynamic ('dev contradicts spec → preflight\n"
+            "  catches → refinement captured → PM ratifies') is NOT validated by\n"
+            "  this headless harness. MCP tool surface is callable and functional;\n"
+            "  agentic auto-fire is the open gap.\n\n"
+            "  Validate the agentic layer via the interactive recording path\n"
+            "  (tmux-driven real claude TUI). See tests/e2e/record_demo.sh."
+        )
+    print()
+
+
+def _verdict_marker(r: FlowResult) -> str:
+    if r.verdict == "SKIP":
+        return "⏭ "
+    if r.verdict == "PASS" and not r.advisory:
+        return "✅"
+    if r.verdict == "PASS" and r.advisory:
+        return "⚠️ "  # passes but compromised — caveat in advisories section
+    if r.verdict == "FAIL" and r.advisory:
+        return "⚠️ "  # advisory failure — known gap, not a tool bug
+    return "❌"
+
+
+def _flow_one_line(flow_id: str) -> str:
+    return {
+        "Flow 1": "ingest decisions from a doc",
+        "Flow 2": "auto-fire preflight before write op (auto-fire scope)",
+        "Flow 2a": "full correction-capture loop (ingest agent_session + resolve_collision)",
+        "Flow 3": "commit on bound file → ledger flips decision to `pending`",
+        "Flow 4": "in-session correction capture (chained dev_session)",
+        "Flow 5": "PM Friday review — history + ratify",
+    }.get(flow_id, "")
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/tests/eval/_baseline_io.py b/tests/eval/_baseline_io.py
index a29b6763..fd2c3958 100644
--- a/tests/eval/_baseline_io.py
+++ b/tests/eval/_baseline_io.py
@@ -22,15 +22,15 @@
 Noise floors: tokens 10 (deterministic, but tolerate small generator tweaks),
 latency 0.5ms (OS scheduler + GC jitter on non-realtime kernels).
 """
+
 from __future__ import annotations
 
 import json
 import os
 import platform
-from datetime import datetime, timezone
+from datetime import UTC, datetime, timezone
 from pathlib import Path
 
-
 BASELINE_VERSION = "1"
 RELATIVE_THRESHOLD = 0.20
 TOKEN_NOISE_FLOOR = 10
@@ -64,12 +64,14 @@ def load_baselines(path: Path = BASELINE_PATH) -> list[dict]:
 
 def write_baselines(rows: list[dict], path: Path = BASELINE_PATH) -> None:
     """Sorted, stable-key JSONL output to keep diffs minimal."""
+
     def _sort_key(row: dict) -> tuple:
         return (
             row.get("metric", ""),
             row.get("recorded_on", ""),
             row.get("n_features", -1),
         )
+
     rows_sorted = sorted(rows, key=_sort_key)
     body = "\n".join(json.dumps(r, sort_keys=True, ensure_ascii=False) for r in rows_sorted)
     path.write_text(body + "\n", encoding="utf-8")
@@ -154,4 +156,4 @@ def regression_check(
 
 
 def now_iso() -> str:
-    return datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z")
+    return datetime.now(UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z")
diff --git a/tests/eval/_skill_judge.py b/tests/eval/_skill_judge.py
index dc426ce5..014b245d 100644
--- a/tests/eval/_skill_judge.py
+++ b/tests/eval/_skill_judge.py
@@ -16,6 +16,7 @@
     BICAMERAL_PREFLIGHT_EVAL_MODEL          default "claude-sonnet-4-6"
     BICAMERAL_PREFLIGHT_EVAL_RECORD=1       force-bypass cache, re-record
 """
+
 from __future__ import annotations
 
 import hashlib
@@ -27,7 +28,6 @@
 
 import httpx
 
-
 REPO_ROOT = Path(__file__).resolve().parents[2]
 SKILL_MD_PATH = REPO_ROOT / "skills" / "bicameral-preflight" / "SKILL.md"
 CACHE_DIR = Path(__file__).resolve().parent / "fixtures" / "skill_judge"
@@ -129,7 +129,7 @@ def _extract_step1_excerpt(skill_md: str) -> str:
 
     next_header = _STEP_HEADER_RE.search(body, step1_match.end())
     end = next_header.start() if next_header else len(body)
-    return body[step1_match.start():end].strip()
+    return body[step1_match.start() : end].strip()
 
 
 def _cache_path(model: str, skill_sha: str, input_sha: str) -> Path:
@@ -169,9 +169,7 @@ def _call_messages_api(
     with httpx.Client(timeout=REQUEST_TIMEOUT_S) as client:
         resp = client.post(ANTHROPIC_API_URL, headers=headers, json=payload)
         if resp.status_code >= 400:
-            raise RuntimeError(
-                f"Anthropic API error {resp.status_code}: {resp.text[:500]}"
-            )
+            raise RuntimeError(f"Anthropic API error {resp.status_code}: {resp.text[:500]}")
         data = resp.json()
 
     stop_reason = data.get("stop_reason", "")
@@ -184,9 +182,7 @@ def _call_messages_api(
             f"(stop_reason={stop_reason!r}, text={'|'.join(text_parts)[:300]!r})"
         )
     if stop_reason == "max_tokens":
-        raise RuntimeError(
-            f"Anthropic response hit max_tokens={MAX_OUTPUT_TOKENS}"
-        )
+        raise RuntimeError(f"Anthropic response hit max_tokens={MAX_OUTPUT_TOKENS}")
     judgment = tool_use.get("input")
     if not isinstance(judgment, dict):
         raise RuntimeError(f"tool_use input is not a dict: {judgment!r}")
diff --git a/tests/eval/_synthetic_ledger.py b/tests/eval/_synthetic_ledger.py
index 468d70e5..0df1891f 100644
--- a/tests/eval/_synthetic_ledger.py
+++ b/tests/eval/_synthetic_ledger.py
@@ -10,19 +10,35 @@
 fixed corpus and parameterized by index, so the payload feels plausible (not
 "lorem ipsum") but generation stays deterministic and zero-network.
 """
+
 from __future__ import annotations
 
 import random
 
-
 GENERATOR_VERSION = "1"
 
 
 _FEATURE_NAMES: list[str] = [
-    "auth", "billing", "payments", "logging", "audit", "search", "api",
-    "webhooks", "retention", "indexing", "ingestion", "drift-detection",
-    "ratification", "rate-limiting", "caching", "locking", "dedup", "ttl",
-    "sync", "scheduling",
+    "auth",
+    "billing",
+    "payments",
+    "logging",
+    "audit",
+    "search",
+    "api",
+    "webhooks",
+    "retention",
+    "indexing",
+    "ingestion",
+    "drift-detection",
+    "ratification",
+    "rate-limiting",
+    "caching",
+    "locking",
+    "dedup",
+    "ttl",
+    "sync",
+    "scheduling",
 ]
 
 
@@ -131,7 +147,9 @@ def _make_decision(
 
     if status in {"reflected", "drifted"}:
         baseline_hash = f"{decision_index:064x}"[-64:]
-        current_hash = baseline_hash if status == "reflected" else f"{decision_index + 1:064x}"[-64:]
+        current_hash = (
+            baseline_hash if status == "reflected" else f"{decision_index + 1:064x}"[-64:]
+        )
         decision["fulfillments"] = [
             {
                 "file_path": f"{feature_id}/handler_{decision_index}.py",
@@ -174,24 +192,21 @@ def generate_ledger(
     if n_features < 0:
         raise ValueError(f"n_features must be >= 0, got {n_features}")
     if decisions_per_feature < 0:
-        raise ValueError(
-            f"decisions_per_feature must be >= 0, got {decisions_per_feature}"
-        )
+        raise ValueError(f"decisions_per_feature must be >= 0, got {decisions_per_feature}")
 
     rng = random.Random(seed)
 
     features: list[dict] = []
     for i in range(n_features):
         feature_id = _feature_id(i)
-        decisions = [
-            _make_decision(rng, feature_id, j)
-            for j in range(decisions_per_feature)
-        ]
-        features.append({
-            "id": feature_id,
-            "name": feature_id.replace("-", " ").title(),
-            "decisions": decisions,
-        })
+        decisions = [_make_decision(rng, feature_id, j) for j in range(decisions_per_feature)]
+        features.append(
+            {
+                "id": feature_id,
+                "name": feature_id.replace("-", " ").title(),
+                "decisions": decisions,
+            }
+        )
 
     return {
         "features": features,
diff --git a/tests/eval/_token_count.py b/tests/eval/_token_count.py
index c6cb7936..18a30199 100644
--- a/tests/eval/_token_count.py
+++ b/tests/eval/_token_count.py
@@ -8,6 +8,7 @@
 tiktoken is pinned in ``pyproject.toml`` ``[test]`` extras to avoid silent
 count drift across CI runs.
 """
+
 from __future__ import annotations
 
 import functools
@@ -17,6 +18,7 @@
 @functools.lru_cache(maxsize=1)
 def _encoder():
     import tiktoken
+
     return tiktoken.get_encoding("cl100k_base")
 
 
diff --git a/tests/eval/preflight_dataset.jsonl b/tests/eval/preflight_dataset.jsonl
index 8909c860..55429049 100644
--- a/tests/eval/preflight_dataset.jsonl
+++ b/tests/eval/preflight_dataset.jsonl
@@ -1,6 +1,6 @@
 {"id": "M5", "layer": "handler", "axis": "miss", "catalog_status": "acknowledged", "title": "No file_paths supplied → no region surface (HITL also empty)", "setup": {"region_decisions": [{"decision_id": "decision:auth_jwt_ttl", "description": "JWT tokens expire after 60 minutes", "status": "reflected", "file_path": "auth/jwt.py", "symbol": "verify_token"}]}, "input": {"topic": "update auth configuration", "file_paths": []}, "expect": {"fired": false, "reason": "no_matches", "decisions_count": 0, "collision_pending_count": 0, "context_pending_ready_count": 0}, "xfail": null, "note": "Documents acknowledged behavior: when caller omits file_paths, region anchor is unreachable and only HITL/guided fire."}
 {"id": "M5_hitl_global", "layer": "handler", "axis": "correct", "catalog_status": "intentional", "title": "Empty file_paths but collision-pending exists → HITL fires globally", "setup": {"region_decisions": [], "collision_pending": [{"decision_id": "decision:billing_dedup", "description": "Use SETNX for payment idempotency", "status": "pending", "signoff": {"state": "collision_pending"}}]}, "input": {"topic": "update auth configuration", "file_paths": []}, "expect": {"fired": true, "reason": "fired", "decisions_count": 0, "collision_pending_count": 1, "context_pending_ready_count": 0}, "xfail": null, "note": "Validates FF4-adjacent contract: HITL is global and fires regardless of topic/file_paths."}
-{"id": "M6", "layer": "handler", "axis": "miss", "catalog_status": "open", "title": "Transitive miss — decision pinned to a dependency of file_paths", "setup": {"region_decisions": []}, "input": {"topic": "refactor login handler", "file_paths": ["auth/login_handler.py"]}, "expect": {"fired": true, "reason": "fired", "decisions_count": 1}, "xfail": "M6 — get_neighbors transitive expansion not wired in handle_preflight; mitigation queued in catalog implementation queue", "note": "Decision is pinned to auth/jwt.py which login_handler imports; today's handler only sees the direct file."}
+{"id": "M6", "layer": "handler", "axis": "miss", "catalog_status": "fixed", "title": "Transitive miss — decision pinned to a dependency of file_paths", "setup": {"region_decisions_pinned_to": {"auth/jwt.py": [{"decision_id": "decision:auth_jwt_validation", "description": "JWT signature validation must use RS256 — never HS256", "status": "reflected", "symbol": "verify_token"}]}, "graph_neighbors": {"auth/login_handler.py": ["auth/jwt.py"]}}, "input": {"topic": "refactor login handler", "file_paths": ["auth/login_handler.py"]}, "expect": {"fired": true, "reason": "fired", "decisions_count": 1}, "xfail": null, "note": "Decision is pinned to auth/jwt.py; login_handler imports jwt, so 1-hop graph expansion adds auth/jwt.py to the lookup set and the decision surfaces. Closed by #173/#174 (deterministic 1-hop expansion in _region_anchored_preflight)."}
 {"id": "M7a_dedup_ledger_change", "layer": "handler", "axis": "miss", "catalog_status": "open", "title": "Dedup window swallows fresh signal after a relevant decision lands", "calls": [{"input": {"topic": "webhook idempotency", "file_paths": ["payments/stripe.py"]}, "setup": {"region_decisions": []}}, {"input": {"topic": "webhook idempotency", "file_paths": ["payments/stripe.py"]}, "setup": {"region_decisions": [{"decision_id": "decision:wh_dedup", "description": "Webhook events deduped via Redis SETNX", "status": "reflected", "file_path": "payments/stripe.py", "symbol": "handle_webhook"}]}}], "expect_final": {"fired": true, "reason": "fired", "decisions_count": 1}, "xfail": "M7 — dedup key is (topic) only; second call hits recently_checked. Fix queued: broaden cache key to (topic, normalized_file_paths, ledger_revision).", "note": "Two-call: first call empty, ledger gains decision, second call within window currently silenced."}
 {"id": "M7b_dedup_file_paths_shift", "layer": "handler", "axis": "miss", "catalog_status": "open", "title": "Dedup window swallows result when file_paths shifts to a different region", "calls": [{"input": {"topic": "refactor handler", "file_paths": ["auth/login.py"]}, "setup": {"region_decisions": []}}, {"input": {"topic": "refactor handler", "file_paths": ["billing/subscriptions.py"]}, "setup": {"region_decisions": [{"decision_id": "decision:billing_proration", "description": "Pro-rate refunds on plan downgrade", "status": "reflected", "file_path": "billing/subscriptions.py", "symbol": "downgrade"}]}}], "expect_final": {"fired": true, "reason": "fired", "decisions_count": 1}, "xfail": "M7 — same dedup-key issue; file_paths must be part of the cache key.", "note": "Same topic, different file_paths — second call should re-evaluate but is silenced today."}
 {"id": "M7c_dedup_hitl_change", "layer": "handler", "axis": "miss", "catalog_status": "open", "title": "Dedup window ignores HITL state changes within window", "calls": [{"input": {"topic": "feature work session", "file_paths": []}, "setup": {"collision_pending": [{"decision_id": "decision:hitl_open", "description": "Pending collision in payments", "status": "pending", "signoff": {"state": "collision_pending"}}]}}, {"input": {"topic": "feature work session", "file_paths": []}, "setup": {"collision_pending": []}}], "expect_final": {"fired": false, "reason": "no_matches", "collision_pending_count": 0}, "xfail": "M7 — dedup ignores HITL revision. Fix queued: invalidate dedup on HITL state change.", "note": "First call surfaces HITL; HITL resolves; second call should re-evaluate (no signal → silent) but currently returns recently_checked."}
diff --git a/tests/eval/run_preflight_cost_eval.py b/tests/eval/run_preflight_cost_eval.py
index c03463e4..26ce982c 100644
--- a/tests/eval/run_preflight_cost_eval.py
+++ b/tests/eval/run_preflight_cost_eval.py
@@ -24,6 +24,7 @@
   for the current platform; no assertion runs
 - No baseline for current platform: skip with re-record instructions
 """
+
 from __future__ import annotations
 
 import asyncio
@@ -56,7 +57,6 @@
 from _synthetic_ledger import GENERATOR_VERSION, generate_ledger  # noqa: E402
 from _token_count import count_tokens, count_tokens_json  # noqa: E402
 
-
 _C3_WARMUP = 10
 _C3_SAMPLES = 100
 
@@ -139,8 +139,10 @@ def _isolate_handler_environment(monkeypatch, tmp_path):
     monkeypatch.delenv("BICAMERAL_PREFLIGHT_MUTE", raising=False)
     monkeypatch.setenv("HOME", str(tmp_path))
     import handlers.sync_middleware as sm
+
     monkeypatch.setattr(sm, "ensure_ledger_synced", AsyncMock(return_value=None))
     import handlers.preflight as pf
+
     monkeypatch.setattr(pf, "_should_show_product_stage", lambda: False)
 
 
@@ -202,21 +204,28 @@ def _build_realistic_ctx(
     ledger._inner = inner
 
     import ledger.queries as lq
+
     monkeypatch.setattr(
         lq,
         "get_collision_pending_decisions",
-        AsyncMock(return_value=[
-            _make_hitl_row(f"decision:coll-{i}", f"Collision pending {i}", "collision_pending")
-            for i in range(n_collision_pending)
-        ]),
+        AsyncMock(
+            return_value=[
+                _make_hitl_row(f"decision:coll-{i}", f"Collision pending {i}", "collision_pending")
+                for i in range(n_collision_pending)
+            ]
+        ),
     )
     monkeypatch.setattr(
         lq,
         "get_context_for_ready_decisions",
-        AsyncMock(return_value=[
-            _make_hitl_row(f"decision:ctx-{i}", f"Context pending ready {i}", "context_pending_ready")
-            for i in range(n_context_pending)
-        ]),
+        AsyncMock(
+            return_value=[
+                _make_hitl_row(
+                    f"decision:ctx-{i}", f"Context pending ready {i}", "context_pending_ready"
+                )
+                for i in range(n_context_pending)
+            ]
+        ),
     )
 
     return SimpleNamespace(
diff --git a/tests/eval/run_preflight_eval.py b/tests/eval/run_preflight_eval.py
index 1a018990..6c126280 100644
--- a/tests/eval/run_preflight_eval.py
+++ b/tests/eval/run_preflight_eval.py
@@ -13,6 +13,7 @@
 Skill-layer scenarios (M1–M4, FF1, FF3 in the catalog) are deferred to
 phase 2 (LLM-in-the-loop) and are not included here.
 """
+
 from __future__ import annotations
 
 import asyncio
@@ -25,7 +26,6 @@
 
 import pytest
 
-
 DATASET = Path(__file__).parent / "preflight_dataset.jsonl"
 CATALOG = Path(__file__).parent.parent.parent / "docs" / "preflight-failure-scenarios.md"
 
@@ -102,11 +102,60 @@ def _make_ctx(*, guided_mode: bool, sync_state: dict) -> SimpleNamespace:
 
 def _apply_setup(monkeypatch, setup: dict, ctx: SimpleNamespace) -> None:
     region_decisions = setup.get("region_decisions") or []
-    ctx.ledger.get_decisions_for_files = AsyncMock(
-        return_value=[_make_decision_dict(d) for d in region_decisions]
-    )
+    pinned_decisions = setup.get("region_decisions_pinned_to") or {}
+
+    if pinned_decisions:
+        # Path-aware mock — used by M6 (graph expansion). The handler may call
+        # get_decisions_for_files with the caller's original paths or with
+        # those paths plus 1-hop neighbors; only return decisions whose
+        # pinned file is among the paths supplied in *this* call. That makes
+        # the test honest: M6 passes only when the expansion supplies the
+        # neighbor path that the decision is pinned to.
+        async def _path_aware_lookup(paths):
+            out: list[dict] = []
+            for fp in paths or []:
+                for d in pinned_decisions.get(fp, []):
+                    out.append(_make_decision_dict({**d, "file_path": fp}))
+            return out
+
+        ctx.ledger.get_decisions_for_files = AsyncMock(side_effect=_path_aware_lookup)
+    else:
+        ctx.ledger.get_decisions_for_files = AsyncMock(
+            return_value=[_make_decision_dict(d) for d in region_decisions]
+        )
+
+    # Optional graph-neighbor topology for M6-style scenarios. When set, attach
+    # a stub code_graph adapter to ctx that expands file_paths by 1 hop using
+    # the supplied dict (file_path → list[neighbor_file_path]). When absent,
+    # leave ctx without a code_graph attribute — preflight's expansion path
+    # is defensive (`getattr(ctx, "code_graph", None)`) and falls back to
+    # exact-match-only retrieval.
+    graph_neighbors = setup.get("graph_neighbors") or {}
+    if graph_neighbors:
+
+        class _DatasetCodeGraph:
+            def expand_file_paths_via_graph(
+                self, file_paths: list[str], hops: int = 1
+            ) -> tuple[list[str], list[str]]:
+                expanded: list[str] = []
+                added: list[str] = []
+                seen: set[str] = set()
+                for fp in file_paths or []:
+                    if fp and fp not in seen:
+                        seen.add(fp)
+                        expanded.append(fp)
+                for fp in file_paths or []:
+                    for n in graph_neighbors.get(fp, []):
+                        if n and n not in seen:
+                            seen.add(n)
+                            expanded.append(n)
+                            added.append(n)
+                return expanded, added
+
+        ctx.code_graph = _DatasetCodeGraph()
 
     import ledger.queries as lq
+
     monkeypatch.setattr(
         lq,
         "get_collision_pending_decisions",
@@ -124,8 +173,10 @@ def _isolate_handler_environment(monkeypatch, tmp_path):
     monkeypatch.delenv("BICAMERAL_PREFLIGHT_MUTE", raising=False)
     monkeypatch.setenv("HOME", str(tmp_path))
     import handlers.sync_middleware as sm
+
     monkeypatch.setattr(sm, "ensure_ledger_synced", AsyncMock(return_value=None))
     import handlers.preflight as pf
+
     monkeypatch.setattr(pf, "_should_show_product_stage", lambda: False)
 
 
diff --git a/tests/eval/run_preflight_skill_eval.py b/tests/eval/run_preflight_skill_eval.py
index 82290511..b60057aa 100644
--- a/tests/eval/run_preflight_skill_eval.py
+++ b/tests/eval/run_preflight_skill_eval.py
@@ -20,6 +20,7 @@
 miss/false-fire rows (M1-M4, FF1, FF3 in the catalog). A failure here is
 real signal: the LLM did not recover the failure mode the row models.
 """
+
 from __future__ import annotations
 
 import json
@@ -40,7 +41,6 @@
     judge_relevance,
 )
 
-
 DATASET = Path(__file__).parent / "preflight_skill_dataset.jsonl"
 
 REQUIRED_KEYS = {"id", "axis", "title", "topic", "ledger", "expect_relevant"}
diff --git a/tests/eval/test_cost_baseline_helpers.py b/tests/eval/test_cost_baseline_helpers.py
index 02009024..ef828ced 100644
--- a/tests/eval/test_cost_baseline_helpers.py
+++ b/tests/eval/test_cost_baseline_helpers.py
@@ -4,6 +4,7 @@
 - Synthetic ledger generator: determinism, shape, scaling, status distribution
 - Token counter: basic call, JSON-serialized payloads, monotonicity
 """
+
 from __future__ import annotations
 
 import sys
@@ -28,7 +29,6 @@
 )
 from _token_count import count_tokens, count_tokens_json  # noqa: E402
 
-
 # ── Generator: determinism ──────────────────────────────────────────────
 
 
@@ -50,7 +50,11 @@ def test_generator_diverges_for_different_seeds():
 def test_generator_top_level_shape():
     ledger = generate_ledger(n_features=10)
     assert set(ledger.keys()) >= {
-        "features", "truncated", "total_features", "as_of", "sync_metrics",
+        "features",
+        "truncated",
+        "total_features",
+        "as_of",
+        "sync_metrics",
         "_generator_version",
     }
     assert ledger["total_features"] == 10
@@ -78,12 +82,7 @@ def test_generator_decision_shape():
 
 def test_drifted_decision_has_drift_evidence_and_fulfillment():
     ledger = generate_ledger(n_features=200, seed=42)
-    drifted = [
-        d
-        for f in ledger["features"]
-        for d in f["decisions"]
-        if d["status"] == "drifted"
-    ]
+    drifted = [d for f in ledger["features"] for d in f["decisions"] if d["status"] == "drifted"]
     assert drifted, "expected at least one drifted decision at N=200"
     for d in drifted:
         assert d["drift_evidence"], "drifted decisions must carry drift_evidence"
@@ -93,10 +92,7 @@ def test_drifted_decision_has_drift_evidence_and_fulfillment():
 def test_ungrounded_decision_has_no_fulfillment():
     ledger = generate_ledger(n_features=200, seed=42)
     ungrounded = [
-        d
-        for f in ledger["features"]
-        for d in f["decisions"]
-        if d["status"] == "ungrounded"
+        d for f in ledger["features"] for d in f["decisions"] if d["status"] == "ungrounded"
     ]
     assert ungrounded, "expected at least one ungrounded decision at N=200"
     for d in ungrounded:
diff --git a/tests/eval_decision_relevance.py b/tests/eval_decision_relevance.py
index ada27cf8..397af463 100644
--- a/tests/eval_decision_relevance.py
+++ b/tests/eval_decision_relevance.py
@@ -31,6 +31,7 @@
 The fixture is the single source of truth for corpus + oracle. Adding a new
 transcript = one entry in TRANSCRIPT_SOURCES. No runner changes.
 """
+
 from __future__ import annotations
 
 import argparse
@@ -79,9 +80,7 @@ def _build_payload_from_fixture(source_ref: str) -> dict:
     }
 
 
-def _build_payload_from_skill_md(
-    transcript_text: str, source_ref: str
-) -> tuple[dict, list[dict]]:
+def _build_payload_from_skill_md(transcript_text: str, source_ref: str) -> tuple[dict, list[dict]]:
     """Call the headless extraction driver (Step 1 of the current SKILL.md)
     and shape the result as a natural-format ingest payload.
 
@@ -136,9 +135,7 @@ async def _ingest_one(
     if skill_variant == "none":
         payload = _build_payload_from_fixture(source_ref)
     elif skill_variant == "from-skill-md":
-        payload, extracted_decisions = _build_payload_from_skill_md(
-            transcript_text, source_ref
-        )
+        payload, extracted_decisions = _build_payload_from_skill_md(transcript_text, source_ref)
     else:
         raise ValueError(f"unknown skill-variant: {skill_variant!r}")
 
@@ -155,9 +152,7 @@ async def _ingest_one(
     # its input, so comparing it against itself would be tautological).
     if skill_variant == "from-skill-md":
         ground_truth = load_fixture(source_ref)
-        extraction_metrics = compute_extraction_metrics(
-            extracted_decisions, ground_truth
-        )
+        extraction_metrics = compute_extraction_metrics(extracted_decisions, ground_truth)
     else:
         extraction_metrics = {"skipped": True, "reason": "not applicable in this variant"}
 
@@ -306,11 +301,8 @@ async def run(args) -> tuple[dict, int]:
     # repo boundaries — precision/recall of the skill is a global property).
     sys.path.insert(0, str(Path(__file__).resolve().parent))
     from _extraction_metrics import aggregate_extraction_metrics  # type: ignore[import-not-found]
-    all_extraction_rows = [
-        t["extraction_metrics"]
-        for r in repo_reports
-        for t in r["transcripts"]
-    ]
+
+    all_extraction_rows = [t["extraction_metrics"] for r in repo_reports for t in r["transcripts"]]
     aggregate_extraction = aggregate_extraction_metrics(all_extraction_rows)
 
     combined = {
@@ -378,8 +370,7 @@ async def run(args) -> tuple[dict, int]:
         exit_code = 1
     if exit_code == 0 and args.min_grounded_pct is not None:
         print(
-            f"\n✅ PASS: grounded_pct {aggregate_pct:.3f} "
-            f"≥ threshold {args.min_grounded_pct:.3f}"
+            f"\n✅ PASS: grounded_pct {aggregate_pct:.3f} ≥ threshold {args.min_grounded_pct:.3f}"
         )
 
     return combined, exit_code
diff --git a/tests/fixtures/expected/decisions.py b/tests/fixtures/expected/decisions.py
index 4f65c5b2..d947bbd1 100644
--- a/tests/fixtures/expected/decisions.py
+++ b/tests/fixtures/expected/decisions.py
@@ -20,7 +20,13 @@
     {
         "description": "Add 12-second timeout ceiling on payment provider authorize calls; return requires_more status on timeout",
         "source_ref": "medusa-payment-timeout",
-        "keywords": ["payment timeout", "authorize call", "12 second", "requires_more", "checkout timeout"],
+        "keywords": [
+            "payment timeout",
+            "authorize call",
+            "12 second",
+            "requires_more",
+            "checkout timeout",
+        ],
         "expected_symbols": [
             "PaymentProviderService",
         ],
@@ -31,7 +37,13 @@
     {
         "description": "Background sweeper job via JobSchedulerService: void payment sessions stuck in pending state for more than 5 minutes",
         "source_ref": "medusa-payment-timeout",
-        "keywords": ["sweeper job", "pending payment session", "void", "5 minutes", "job scheduler"],
+        "keywords": [
+            "sweeper job",
+            "pending payment session",
+            "void",
+            "5 minutes",
+            "job scheduler",
+        ],
         "expected_symbols": [
             "PaymentProviderService",
         ],
@@ -54,7 +66,13 @@
     {
         "description": "Guard against garbage responses from community payment providers — throw typed error if authorize returns undefined or malformed object",
         "source_ref": "medusa-payment-timeout",
-        "keywords": ["validate provider response", "community provider", "undefined response", "typed error", "authorize response"],
+        "keywords": [
+            "validate provider response",
+            "community provider",
+            "undefined response",
+            "typed error",
+            "authorize response",
+        ],
         "expected_symbols": [
             "PaymentProviderService",
         ],
@@ -69,7 +87,13 @@
     {
         "description": "Migrate plugin service classes from TransactionBaseService to AbstractModuleService using @Module decorator",
         "source_ref": "medusa-plugin-migration",
-        "keywords": ["plugin migration", "AbstractModuleService", "@Module decorator", "TransactionBaseService", "v2 module"],
+        "keywords": [
+            "plugin migration",
+            "AbstractModuleService",
+            "@Module decorator",
+            "TransactionBaseService",
+            "v2 module",
+        ],
         "expected_symbols": [
             "AbstractModuleService",
         ],
@@ -80,7 +104,13 @@
     {
         "description": "Convert plugin subscribers to createWorkflow/createStep pattern; subscribers directory no longer auto-registers in v2",
         "source_ref": "medusa-plugin-migration",
-        "keywords": ["subscribers", "createWorkflow", "createStep", "workflow migration", "event subscriber"],
+        "keywords": [
+            "subscribers",
+            "createWorkflow",
+            "createStep",
+            "workflow migration",
+            "event subscriber",
+        ],
         "expected_symbols": [
             "createWorkflow",
             "createStep",
@@ -92,7 +122,13 @@
     {
         "description": "Service injection must go through Modules registry — no direct imports of core services from other modules",
         "source_ref": "medusa-plugin-migration",
-        "keywords": ["Modules registry", "service injection", "no direct imports", "awilix scoping", "module isolation"],
+        "keywords": [
+            "Modules registry",
+            "service injection",
+            "no direct imports",
+            "awilix scoping",
+            "module isolation",
+        ],
         "expected_symbols": [
             "Modules",
             "OrderService",
@@ -105,7 +141,13 @@
     {
         "description": "Run v1 and v2 API routes in parallel for one release cycle using middlewares.ts pattern",
         "source_ref": "medusa-plugin-migration",
-        "keywords": ["backward compat", "v1 routes", "parallel routes", "middlewares.ts", "legacy API"],
+        "keywords": [
+            "backward compat",
+            "v1 routes",
+            "parallel routes",
+            "middlewares.ts",
+            "legacy API",
+        ],
         "expected_symbols": [
             "middlewares",
         ],
@@ -120,7 +162,13 @@
     {
         "description": "Create WebhookEndpoint model with fields: URL, HMAC secret, subscribed event types, per-merchant",
         "source_ref": "medusa-webhook-notifications",
-        "keywords": ["WebhookEndpoint", "merchant webhook", "webhook model", "HMAC secret", "event subscription"],
+        "keywords": [
+            "WebhookEndpoint",
+            "merchant webhook",
+            "webhook model",
+            "HMAC secret",
+            "event subscription",
+        ],
         "expected_symbols": [
             "AbstractNotificationProviderService",
         ],
@@ -131,7 +179,13 @@
     {
         "description": "Exponential backoff retry: 30s initial delay, max 4h, 6 retries then dead-letter queue to Redis Streams",
         "source_ref": "medusa-webhook-notifications",
-        "keywords": ["exponential backoff", "retry webhook", "dead letter queue", "6 retries", "Redis DLQ"],
+        "keywords": [
+            "exponential backoff",
+            "retry webhook",
+            "dead letter queue",
+            "6 retries",
+            "Redis DLQ",
+        ],
         "expected_symbols": [],
         "expected_file_patterns": ["webhook", "retry"],
         "prd_failure_mode": "CONSTRAINT_LOST",  # Retry policy is an explicit constraint
@@ -149,7 +203,12 @@
     {
         "description": "Include idempotency key (UUID per delivery attempt) in webhook payload so merchants can deduplicate",
         "source_ref": "medusa-webhook-notifications",
-        "keywords": ["idempotency key", "webhook deduplication", "UUID delivery", "delivery attempt"],
+        "keywords": [
+            "idempotency key",
+            "webhook deduplication",
+            "UUID delivery",
+            "delivery attempt",
+        ],
         "expected_symbols": [],
         "expected_file_patterns": ["webhook"],
         "prd_failure_mode": "CONSTRAINT_LOST",
@@ -163,7 +222,13 @@
     {
         "description": "Synchronous validation hooks in checkout pipeline that can reject operations — plugin raises ValidationError that propagates through GraphQL",
         "source_ref": "saleor-checkout-extensibility",
-        "keywords": ["checkout validation", "synchronous hooks", "ValidationError", "reject operation", "pre-validation"],
+        "keywords": [
+            "checkout validation",
+            "synchronous hooks",
+            "ValidationError",
+            "reject operation",
+            "pre-validation",
+        ],
         "expected_symbols": [
             "PluginsManager",
             "CheckoutError",
@@ -175,7 +240,13 @@
     {
         "description": "Circuit breaker: 3 consecutive validation endpoint timeouts — skip that plugin for subsequent checkouts; per-app per-event-type tracking in Redis sliding window",
         "source_ref": "saleor-checkout-extensibility",
-        "keywords": ["circuit breaker", "validation timeout", "3 consecutive failures", "skip plugin", "sliding window"],
+        "keywords": [
+            "circuit breaker",
+            "validation timeout",
+            "3 consecutive failures",
+            "skip plugin",
+            "sliding window",
+        ],
         "expected_symbols": [],
         "expected_file_patterns": ["checkout", "plugin", "circuit"],
         "prd_failure_mode": "CONSTRAINT_LOST",
@@ -185,7 +256,13 @@
     {
         "description": "Cache checkout validation results in Redis keyed by last_change timestamp with TTL; invalidate on line changes, address updates, or shipping method changes",
         "source_ref": "saleor-checkout-extensibility",
-        "keywords": ["cache validation", "last_change", "Redis TTL", "checkout cache", "validation cache"],
+        "keywords": [
+            "cache validation",
+            "last_change",
+            "Redis TTL",
+            "checkout cache",
+            "validation cache",
+        ],
         "expected_symbols": [
             "Checkout",
         ],
@@ -196,7 +273,12 @@
     {
         "description": "Plugins receive serialized checkout data, not raw querysets — security boundary to prevent third-party data access",
         "source_ref": "saleor-checkout-extensibility",
-        "keywords": ["plugin data access", "serialized data", "security boundary", "not raw queryset"],
+        "keywords": [
+            "plugin data access",
+            "serialized data",
+            "security boundary",
+            "not raw queryset",
+        ],
         "expected_symbols": [
             "PluginsManager",
         ],
@@ -212,7 +294,13 @@
     {
         "description": "Channel-scoped JWT permissions: permission claim becomes dict mapping codename to list of channel slugs or ['*'] for global; existing flat format treated as all-channels for backward compat",
         "source_ref": "saleor-graphql-permissions",
-        "keywords": ["channel permissions", "JWT scoped", "channel slug", "permission_required", "backward compat"],
+        "keywords": [
+            "channel permissions",
+            "JWT scoped",
+            "channel slug",
+            "permission_required",
+            "backward compat",
+        ],
         "expected_symbols": [
             "check_permissions",
             "effective_permissions",
@@ -224,7 +312,11 @@
     {
         "description": "Gate checkoutComplete mutation on channel permission before any side effects — order creation, payment processing, webhooks",
         "source_ref": "saleor-graphql-permissions",
-        "keywords": ["checkoutComplete permission", "gate before side effects", "early permission check"],
+        "keywords": [
+            "checkoutComplete permission",
+            "gate before side effects",
+            "early permission check",
+        ],
         "expected_symbols": [
             "checkoutComplete",
             "check_permissions",
@@ -237,7 +329,12 @@
     {
         "description": "App model: add channel_access relationship so third-party apps only access channels they are installed for",
         "source_ref": "saleor-graphql-permissions",
-        "keywords": ["app channel access", "channel_access", "third-party app permission", "app installed channels"],
+        "keywords": [
+            "app channel access",
+            "channel_access",
+            "third-party app permission",
+            "app installed channels",
+        ],
         "expected_symbols": [
             "App",
         ],
@@ -252,7 +349,13 @@
     {
         "description": "Wrap decrease_stock and allocation cleanup in transaction.atomic — currently separate operations causing orphaned allocation records when decrease_stock succeeds but cleanup fails",
         "source_ref": "saleor-order-workflows",
-        "keywords": ["transaction.atomic", "decrease_stock", "allocation cleanup", "orphaned allocation", "stock transaction"],
+        "keywords": [
+            "transaction.atomic",
+            "decrease_stock",
+            "allocation cleanup",
+            "orphaned allocation",
+            "stock transaction",
+        ],
         "expected_symbols": [
             "decrease_stock",
             "orderFulfill",
@@ -264,7 +367,13 @@
     {
         "description": "Defer FULFILLMENT_CREATED webhook dispatch to Django on_commit hook — currently fires before stock operations complete causing stale data in downstream systems",
         "source_ref": "saleor-order-workflows",
-        "keywords": ["on_commit", "webhook timing", "FULFILLMENT_CREATED", "defer webhook", "after transaction"],
+        "keywords": [
+            "on_commit",
+            "webhook timing",
+            "FULFILLMENT_CREATED",
+            "defer webhook",
+            "after transaction",
+        ],
         "expected_symbols": [
             "fulfillment_created",
             "FULFILLMENT_CREATED",
@@ -277,7 +386,12 @@
     {
         "description": "Fix update_order_status: missing RETURNED status handling causes orders to stay FULFILLED even after all fulfillments are returned",
         "source_ref": "saleor-order-workflows",
-        "keywords": ["update_order_status", "RETURNED status", "fulfillment status sync", "order status bug"],
+        "keywords": [
+            "update_order_status",
+            "RETURNED status",
+            "fulfillment status sync",
+            "order status bug",
+        ],
         "expected_symbols": [
             "update_order_status",
         ],
@@ -288,7 +402,12 @@
     {
         "description": "Database constraint on Stock: quantity cannot go negative; decrease_stock can produce negative values in race condition",
         "source_ref": "saleor-order-workflows",
-        "keywords": ["stock constraint", "negative quantity", "race condition", "database constraint"],
+        "keywords": [
+            "stock constraint",
+            "negative quantity",
+            "race condition",
+            "database constraint",
+        ],
         "expected_symbols": [
             "Stock",
             "decrease_stock",
@@ -304,7 +423,13 @@
     {
         "description": "Custom ProductVariantPriceUpdateStrategy: strip tax in source channel, convert currency using TaxRateService, reapply destination zone rate; iterate per currency per channel not per channel",
         "source_ref": "vendure-channel-pricing",
-        "keywords": ["ProductVariantPriceUpdateStrategy", "currency conversion", "tax stripping", "multi-channel pricing", "InjectableStrategy"],
+        "keywords": [
+            "ProductVariantPriceUpdateStrategy",
+            "currency conversion",
+            "tax stripping",
+            "multi-channel pricing",
+            "InjectableStrategy",
+        ],
         "expected_symbols": [
             "ProductVariantPriceUpdateStrategy",
             "TaxRateService",
@@ -345,7 +470,12 @@
     {
         "description": "struct type custom field warning: stores as simple-json, no SQL-level querying or indexing on sub-fields — do not use struct if you need to filter on nested values",
         "source_ref": "vendure-custom-fields",
-        "keywords": ["struct custom field", "simple-json", "no SQL indexing", "nested field warning"],
+        "keywords": [
+            "struct custom field",
+            "simple-json",
+            "no SQL indexing",
+            "nested field warning",
+        ],
         "expected_symbols": [],
         "expected_file_patterns": ["custom", "shared-types"],
         "prd_failure_mode": "TRIBAL_KNOWLEDGE",
@@ -359,7 +489,13 @@
     {
         "description": "Enable bufferUpdates on DefaultSearchPlugin to deduplicate by entity ID during bulk imports; switch from SqlJobQueueStrategy to BullMQJobQueuePlugin",
         "source_ref": "vendure-search-reindexing",
-        "keywords": ["bufferUpdates", "BullMQJobQueuePlugin", "search reindex", "SqlJobQueueStrategy", "bulk import"],
+        "keywords": [
+            "bufferUpdates",
+            "BullMQJobQueuePlugin",
+            "search reindex",
+            "SqlJobQueueStrategy",
+            "bulk import",
+        ],
         "expected_symbols": [
             "DefaultSearchPlugin",
             "BullMQJobQueuePlugin",
@@ -372,7 +508,12 @@
     {
         "description": "Split workers using activeQueues option: dedicated search worker plus general worker so reindex does not block order confirmation emails",
         "source_ref": "vendure-search-reindexing",
-        "keywords": ["activeQueues", "split workers", "dedicated search worker", "worker isolation"],
+        "keywords": [
+            "activeQueues",
+            "split workers",
+            "dedicated search worker",
+            "worker isolation",
+        ],
         "expected_symbols": [],
         "expected_file_patterns": ["search", "worker", "config"],
         "prd_failure_mode": "CONSTRAINT_LOST",
@@ -381,7 +522,12 @@
     {
         "description": "Performance targets: reindex p95 search latency under 200ms (was 800ms during reindex), database CPU under 50% during full reindex",
         "source_ref": "vendure-search-reindexing",
-        "keywords": ["search latency 200ms", "database CPU reindex", "p95 latency", "reindex performance"],
+        "keywords": [
+            "search latency 200ms",
+            "database CPU reindex",
+            "p95 latency",
+            "reindex performance",
+        ],
         "expected_symbols": ["DefaultSearchPlugin"],
         "expected_file_patterns": ["search-plugin", "search-strategy", "reindex"],
         "prd_failure_mode": "CONSTRAINT_LOST",
@@ -437,7 +583,13 @@
     {
         "description": "Drift detection flow: detect changed files in a commit, look up intents grounded to those files, recompute status via hash comparison, update intent status",
         "source_ref": "bicameral-mcp-multi-region",
-        "keywords": ["drift detection", "link_commit", "derive_status", "hash comparison", "detect_drift"],
+        "keywords": [
+            "drift detection",
+            "link_commit",
+            "derive_status",
+            "hash comparison",
+            "detect_drift",
+        ],
         "expected_symbols": [
             "handle_link_commit",
             "handle_detect_drift",
@@ -457,7 +609,13 @@
     {
         "description": "Team collaboration mode: dual-write adapter intercepts mutations, emits event files, materializes peer events on startup for multi-user ledger sync",
         "source_ref": "bicameral-mcp-multi-region",
-        "keywords": ["team mode", "dual-write", "event sourcing", "TeamWriteAdapter", "materializer"],
+        "keywords": [
+            "team mode",
+            "dual-write",
+            "event sourcing",
+            "TeamWriteAdapter",
+            "materializer",
+        ],
         "expected_symbols": [
             "TeamWriteAdapter",
             "EventFileWriter",
diff --git a/tests/fixtures/flow2_prompt.json b/tests/fixtures/flow2_prompt.json
new file mode 100644
index 00000000..b29abc4f
--- /dev/null
+++ b/tests/fixtures/flow2_prompt.json
@@ -0,0 +1,3 @@
+{
+  "prompt": "I know the roadmap said drag-and-drop to reorder commits, but actually we're switching to a text-editor approach. Please update cherry-pick.ts and reorder.ts."
+}
diff --git a/tests/generate_e2e_report.py b/tests/generate_e2e_report.py
index 2ec43a96..9771246b 100644
--- a/tests/generate_e2e_report.py
+++ b/tests/generate_e2e_report.py
@@ -15,7 +15,7 @@
 
 import json
 import sys
-from datetime import datetime, timezone
+from datetime import UTC, datetime, timezone
 from pathlib import Path
 
 E2E_DIR = Path(__file__).parent.parent / "test-results" / "e2e"
@@ -112,11 +112,12 @@ def _render_json(data: dict, max_lines: int = 40) -> str:
         text += f"\n... ({len(raw.split(chr(10))) - max_lines} more lines)"
     # Basic syntax coloring
     import re
+
     text = text.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
     text = re.sub(r'"([^"]*)"(?=\s*:)', r'<span style="color:#a88af0">"\1"</span>', text)
     text = re.sub(r':\s*"([^"]*)"', r': <span style="color:#6af0a0">"\1"</span>', text)
-    text = re.sub(r':\s*(\d+\.?\d*)', r': <span style="color:#4af0c4">\1</span>', text)
-    text = re.sub(r':\s*(true|false|null)', r': <span style="color:#f0b94a">\1</span>', text)
+    text = re.sub(r":\s*(\d+\.?\d*)", r': <span style="color:#4af0c4">\1</span>', text)
+    text = re.sub(r":\s*(true|false|null)", r': <span style="color:#f0b94a">\1</span>', text)
     return text
 
 
@@ -141,19 +142,23 @@ def _render_graph_section(graph: dict) -> str:
         nid = str(intent.get("id", ""))
         desc = str(intent.get("description", ""))[:50]
         status = intent.get("cached_status", "—")
-        cy_elements.append({
-            "data": {"id": nid, "label": desc, "status": status, "type": "intent"},
-            "classes": "intent",
-        })
+        cy_elements.append(
+            {
+                "data": {"id": nid, "label": desc, "status": status, "type": "intent"},
+                "classes": "intent",
+            }
+        )
         node_id_set.add(nid)
 
     for symbol in nodes.get("symbols", []):
         nid = str(symbol.get("id", ""))
         name = str(symbol.get("name", nid))
-        cy_elements.append({
-            "data": {"id": nid, "label": name, "type": "symbol"},
-            "classes": "symbol",
-        })
+        cy_elements.append(
+            {
+                "data": {"id": nid, "label": name, "type": "symbol"},
+                "classes": "symbol",
+            }
+        )
         node_id_set.add(nid)
 
     for region in nodes.get("code_regions", []):
@@ -161,10 +166,12 @@ def _render_graph_section(graph: dict) -> str:
         fp = str(region.get("file_path", "?"))
         sym = str(region.get("symbol", ""))
         label = f"{sym}\n{fp.split('/')[-1]}" if sym else fp.split("/")[-1]
-        cy_elements.append({
-            "data": {"id": nid, "label": label, "file": fp, "type": "code_region"},
-            "classes": "code_region",
-        })
+        cy_elements.append(
+            {
+                "data": {"id": nid, "label": label, "file": fp, "type": "code_region"},
+                "classes": "code_region",
+            }
+        )
         node_id_set.add(nid)
 
     for edge_type, edge_list in edges.items():
@@ -174,14 +181,16 @@ def _render_graph_section(graph: dict) -> str:
             src = str(edge.get("out", ""))
             tgt = str(edge.get("in", ""))
             if src in node_id_set and tgt in node_id_set:
-                cy_elements.append({
-                    "data": {
-                        "id": f"e_{edge_type}_{i}_{_graph_counter}",
-                        "source": src,
-                        "target": tgt,
-                        "label": edge_type,
-                    },
-                })
+                cy_elements.append(
+                    {
+                        "data": {
+                            "id": f"e_{edge_type}_{i}_{_graph_counter}",
+                            "source": src,
+                            "target": tgt,
+                            "label": edge_type,
+                        },
+                    }
+                )
 
     elements_json = json.dumps(cy_elements, default=str)
 
@@ -196,25 +205,30 @@ def _render_graph_section(graph: dict) -> str:
     for intent in nodes.get("intents", []):
         desc = str(intent.get("description", ""))[:80]
         status = intent.get("cached_status", "—")
-        color = {"reflected": "#6af0a0", "drifted": "#f06a6a", "pending": "#f0b94a", "ungrounded": "#4ab8f0"}.get(status, "#6b7699")
+        color = {
+            "reflected": "#6af0a0",
+            "drifted": "#f06a6a",
+            "pending": "#f0b94a",
+            "ungrounded": "#4ab8f0",
+        }.get(status, "#6b7699")
         intent_rows += f'<tr><td class="mono">{str(intent.get("id", "?"))[-12:]}</td><td>{desc}</td><td style="color:{color};font-weight:600">{status}</td></tr>\n'
 
     region_rows = ""
     for region in nodes.get("code_regions", []):
         fp = str(region.get("file_path", "?"))
         sym = str(region.get("symbol", "?"))
-        lines = f'{region.get("start_line", "?")}-{region.get("end_line", "?")}'
+        lines = f"{region.get('start_line', '?')}-{region.get('end_line', '?')}"
         region_rows += f'<tr><td class="mono">{fp}</td><td>{sym}</td><td>{lines}</td></tr>\n'
 
     tables_html = ""
     if intent_rows:
-        tables_html += f'''<h4 style="color:#a88af0;margin:12px 0 6px">Intents</h4>
+        tables_html += f"""<h4 style="color:#a88af0;margin:12px 0 6px">Intents</h4>
 <table class="data-table"><tr><th>ID</th><th>Description</th><th>Status</th></tr>
-{intent_rows}</table>'''
+{intent_rows}</table>"""
     if region_rows:
-        tables_html += f'''<h4 style="color:#4af0c4;margin:12px 0 6px">Code Regions</h4>
+        tables_html += f"""<h4 style="color:#4af0c4;margin:12px 0 6px">Code Regions</h4>
 <table class="data-table"><tr><th>File</th><th>Symbol</th><th>Lines</th></tr>
-{region_rows}</table>'''
+{region_rows}</table>"""
 
     return f'''
 <div class="graph-summary">{summary}</div>
@@ -317,7 +331,7 @@ def _render_graph_section(graph: dict) -> str:
 def generate() -> str:
     global _graph_counter
     _graph_counter = 0
-    now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
+    now = datetime.now(UTC).strftime("%Y-%m-%d %H:%M UTC")
 
     sections_html = ""
     total_artifacts = 0
@@ -330,37 +344,37 @@ def generate() -> str:
         response_panels = ""
         for resp in responses:
             rendered = _render_json(resp["data"])
-            response_panels += f'''
+            response_panels += f"""
 <details class="artifact-panel">
   <summary>{resp["name"].replace("_", " ").title()}</summary>
   <pre class="json-output">{rendered}</pre>
-</details>'''
+</details>"""
 
         # Graph panels
         graph_panels = ""
         for graph in graphs:
             graph_html = _render_graph_section(graph["data"])
             c = graph["data"].get("counts", {})
-            graph_panels += f'''
+            graph_panels += f"""
 <div class="artifact-panel graph-panel" style="padding:14px;">
   <div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:4px;">
     <span style="color:var(--accent);font-weight:600;font-size:13px;">Knowledge Graph — {c.get("intents", 0)} intents, {c.get("symbols", 0)} symbols, {c.get("code_regions", 0)} regions</span>
     <div class="cy-legend"><span class="lg-intent">intent</span><span class="lg-symbol">symbol</span><span class="lg-region">code_region</span></div>
   </div>
   {graph_html}
-</div>'''
+</div>"""
 
         has_content = responses or graphs
-        sections_html += f'''
+        sections_html += f"""
 <div class="sdlc-section" style="border-left-color:{section["color"]}">
   <div class="sdlc-badge" style="color:{section["color"]}">{section["sdlc"]}</div>
   <h3>{section["title"]}</h3>
   <p class="sdlc-desc">{section["description"]}</p>
   <div class="tools-used">Tools: <span class="mono">{section["tools"]}</span></div>
   {"<div class='artifacts'>" + response_panels + graph_panels + "</div>" if has_content else '<p class="no-artifacts">No artifacts generated — test may not have run.</p>'}
-</div>'''
+</div>"""
 
-    return f'''<!DOCTYPE html>
+    return f"""<!DOCTYPE html>
 <html lang="en">
 <head>
 <meta charset="UTF-8">
@@ -445,7 +459,7 @@ def generate() -> str:
 
 </div>
 </body>
-</html>'''
+</html>"""
 
 
 def main():
diff --git a/tests/regen_extraction_fixtures.py b/tests/regen_extraction_fixtures.py
index eccebbca..60585f49 100644
--- a/tests/regen_extraction_fixtures.py
+++ b/tests/regen_extraction_fixtures.py
@@ -34,18 +34,18 @@
 After running, `git diff tests/fixtures/extraction/` should
 show the new/changed fixtures. Review, hand-edit if needed, commit.
 """
+
 from __future__ import annotations
 
 import argparse
 import json
 import sys
-from datetime import datetime, timezone
+from datetime import UTC, datetime, timezone
 from pathlib import Path
 
 sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
 sys.path.insert(0, str(Path(__file__).resolve().parent))
 
-from fixtures.expected.decisions import TRANSCRIPT_SOURCES  # noqa: E402
 from _extract_headless import (  # noqa: E402  (sibling module)
     DEFAULT_MODEL,
     SKILL_MD_PATH,
@@ -53,6 +53,7 @@
     _sha,
     extract_from_current_skill,
 )
+from fixtures.expected.decisions import TRANSCRIPT_SOURCES  # noqa: E402
 
 MCP_ROOT = Path(__file__).resolve().parents[1]
 FIXTURES_DIR = Path(__file__).resolve().parent / "fixtures" / "extraction"
@@ -104,7 +105,7 @@ def _regenerate_one(
         "transcript_path": src["transcript"],
         "repo_key": src["repo_key"],
         "generated_by": model,
-        "generated_at": datetime.now(timezone.utc).isoformat(timespec="seconds"),
+        "generated_at": datetime.now(UTC).isoformat(timespec="seconds"),
         "skill_md_sha": _sha(skill_md)[:12],
         "decisions": extracted.get("decisions", []),
         "action_items": extracted.get("action_items", []),
diff --git a/tests/test_alpha_contract.py b/tests/test_alpha_contract.py
index 62873847..481064d7 100644
--- a/tests/test_alpha_contract.py
+++ b/tests/test_alpha_contract.py
@@ -26,6 +26,7 @@
 real commits — labeled under one suite so the v0.7.0 refactor can be
 gated on it.
 """
+
 from __future__ import annotations
 
 import subprocess
@@ -44,7 +45,6 @@
 from handlers.search_decisions import handle_search_decisions
 from handlers.sync_middleware import ensure_ledger_synced, get_session_start_banner
 
-
 # ── Git + ingest helpers ─────────────────────────────────────────────
 
 
@@ -104,14 +104,16 @@ def _ingest_payload(description: str, *, with_region: bool, signoff: bool) -> di
         "code_regions": [],
     }
     if with_region:
-        mapping["code_regions"] = [{
-            "file_path": "impl.py",
-            "symbol": "fetch_user",
-            "type": "function",
-            "start_line": 1,
-            "end_line": 3,
-            "purpose": description,
-        }]
+        mapping["code_regions"] = [
+            {
+                "file_path": "impl.py",
+                "symbol": "fetch_user",
+                "type": "function",
+                "start_line": 1,
+                "end_line": 3,
+                "purpose": description,
+            }
+        ]
     if signoff:
         mapping["signoff"] = {
             "state": "ratified",
@@ -210,21 +212,28 @@ async def test_ingest_bind_commit_marks_reflected(alpha_env):
     # Decision is searchable by description tokens (invariant 1 — "searchable
     # by feature area"). Uses BM25 via handle_search_decisions.
     search_resp = await handle_search_decisions(
-        ctx, query="JWT session authentication", max_results=5,
+        ctx,
+        query="JWT session authentication",
+        max_results=5,
     )
     assert any(m.decision_id == decision_id for m in search_resp.matches), (
         "ingested decision must be retrievable via BM25 search"
     )
 
     # 2. Caller-LLM bind (invariant 2, author-attested via provenance=caller_llm).
-    bind_resp = await handle_bind(ctx, bindings=[{
-        "decision_id": decision_id,
-        "file_path": "impl.py",
-        "symbol_name": "fetch_user",
-        "start_line": 1,
-        "end_line": 3,
-        "purpose": "JWT validation entrypoint",
-    }])
+    bind_resp = await handle_bind(
+        ctx,
+        bindings=[
+            {
+                "decision_id": decision_id,
+                "file_path": "impl.py",
+                "symbol_name": "fetch_user",
+                "start_line": 1,
+                "end_line": 3,
+                "purpose": "JWT validation entrypoint",
+            }
+        ],
+    )
     assert len(bind_resp.bindings) == 1
     b = bind_resp.bindings[0]
     assert b.error is None, f"bind failed: {b.error}"
@@ -235,14 +244,16 @@ async def test_ingest_bind_commit_marks_reflected(alpha_env):
     rc_resp = await handle_resolve_compliance(
         ctx,
         phase="ingest",
-        verdicts=[{
-            "decision_id": decision_id,
-            "region_id": b.region_id,
-            "content_hash": b.content_hash,
-            "verdict": "compliant",
-            "confidence": "high",
-            "explanation": "fetch_user performs JWT lookup as decided.",
-        }],
+        verdicts=[
+            {
+                "decision_id": decision_id,
+                "region_id": b.region_id,
+                "content_hash": b.content_hash,
+                "verdict": "compliant",
+                "confidence": "high",
+                "explanation": "fetch_user performs JWT lookup as decided.",
+            }
+        ],
     )
     assert len(rc_resp.accepted) == 1
     assert not rc_resp.rejected
@@ -274,26 +285,34 @@ async def test_code_edit_without_rebind_marks_drifted(alpha_env):
     )
     decision_id = ingest_resp.pending_grounding_decisions[0]["decision_id"]
 
-    bind_resp = await handle_bind(ctx, bindings=[{
-        "decision_id": decision_id,
-        "file_path": "impl.py",
-        "symbol_name": "fetch_user",
-        "start_line": 1,
-        "end_line": 3,
-    }])
+    bind_resp = await handle_bind(
+        ctx,
+        bindings=[
+            {
+                "decision_id": decision_id,
+                "file_path": "impl.py",
+                "symbol_name": "fetch_user",
+                "start_line": 1,
+                "end_line": 3,
+            }
+        ],
+    )
     b = bind_resp.bindings[0]
     assert b.error is None
 
     await handle_resolve_compliance(
-        ctx, phase="ingest",
-        verdicts=[{
-            "decision_id": decision_id,
-            "region_id": b.region_id,
-            "content_hash": b.content_hash,
-            "verdict": "compliant",
-            "confidence": "high",
-            "explanation": "baseline verified",
-        }],
+        ctx,
+        phase="ingest",
+        verdicts=[
+            {
+                "decision_id": decision_id,
+                "region_id": b.region_id,
+                "content_hash": b.content_hash,
+                "verdict": "compliant",
+                "confidence": "high",
+                "explanation": "baseline verified",
+            }
+        ],
     )
     assert await _decision_status(ctx, decision_id) == "reflected"
 
@@ -392,13 +411,18 @@ async def test_preflight_surfaces_bound_decisions(monkeypatch, alpha_env):
     )
     decision_id = ingest_resp.pending_grounding_decisions[0]["decision_id"]
 
-    bind_resp = await handle_bind(ctx, bindings=[{
-        "decision_id": decision_id,
-        "file_path": "impl.py",
-        "symbol_name": "fetch_user",
-        "start_line": 1,
-        "end_line": 3,
-    }])
+    bind_resp = await handle_bind(
+        ctx,
+        bindings=[
+            {
+                "decision_id": decision_id,
+                "file_path": "impl.py",
+                "symbol_name": "fetch_user",
+                "start_line": 1,
+                "end_line": 3,
+            }
+        ],
+    )
     assert bind_resp.bindings[0].error is None
 
     pf_resp = await handle_preflight(
@@ -410,8 +434,7 @@ async def test_preflight_surfaces_bound_decisions(monkeypatch, alpha_env):
     assert "region" in pf_resp.sources_chained
     decision_ids = [d.decision_id for d in pf_resp.decisions]
     assert decision_id in decision_ids, (
-        f"bound decision {decision_id} missing from preflight response "
-        f"(got: {decision_ids})"
+        f"bound decision {decision_id} missing from preflight response (got: {decision_ids})"
     )
 
 
@@ -441,26 +464,34 @@ async def test_hook_no_fire_still_syncs(alpha_env):
     )
     decision_id = ingest_resp.pending_grounding_decisions[0]["decision_id"]
 
-    bind_resp = await handle_bind(ctx, bindings=[{
-        "decision_id": decision_id,
-        "file_path": "impl.py",
-        "symbol_name": "fetch_user",
-        "start_line": 1,
-        "end_line": 3,
-    }])
+    bind_resp = await handle_bind(
+        ctx,
+        bindings=[
+            {
+                "decision_id": decision_id,
+                "file_path": "impl.py",
+                "symbol_name": "fetch_user",
+                "start_line": 1,
+                "end_line": 3,
+            }
+        ],
+    )
     b = bind_resp.bindings[0]
     assert b.error is None
 
     await handle_resolve_compliance(
-        ctx, phase="ingest",
-        verdicts=[{
-            "decision_id": decision_id,
-            "region_id": b.region_id,
-            "content_hash": b.content_hash,
-            "verdict": "compliant",
-            "confidence": "high",
-            "explanation": "baseline",
-        }],
+        ctx,
+        phase="ingest",
+        verdicts=[
+            {
+                "decision_id": decision_id,
+                "region_id": b.region_id,
+                "content_hash": b.content_hash,
+                "verdict": "compliant",
+                "confidence": "high",
+                "explanation": "baseline",
+            }
+        ],
     )
     assert await _decision_status(ctx, decision_id) == "reflected"
 
diff --git a/tests/test_alpha_flow.py b/tests/test_alpha_flow.py
index aeace213..d8c15230 100644
--- a/tests/test_alpha_flow.py
+++ b/tests/test_alpha_flow.py
@@ -18,6 +18,7 @@
 Plus one v0.7-specific invariant:
 6. Proposal state — new ingests enter as 'proposal'; drift-exempt until ratified.
 """
+
 from __future__ import annotations
 
 import os
@@ -38,7 +39,6 @@
 from handlers.sync_middleware import ensure_ledger_synced, get_session_start_banner
 from ledger.queries import project_decision_status
 
-
 # ── Shared helpers ───────────────────────────────────────────────────
 
 
@@ -96,14 +96,16 @@ def _ratified_payload(description: str, *, with_region: bool = False) -> dict:
         },
     }
     if with_region:
-        mapping["code_regions"] = [{
-            "file_path": "impl.py",
-            "symbol": "fetch_user",
-            "type": "function",
-            "start_line": 1,
-            "end_line": 3,
-            "purpose": description,
-        }]
+        mapping["code_regions"] = [
+            {
+                "file_path": "impl.py",
+                "symbol": "fetch_user",
+                "type": "function",
+                "start_line": 1,
+                "end_line": 3,
+                "purpose": description,
+            }
+        ]
     return {"query": description, "repo": "jacob-repo", "mappings": [mapping]}
 
 
@@ -139,9 +141,13 @@ async def test_ingest_bind_commit_marks_reflected(alpha_env):
     ctx, _ = alpha_env
 
     # Invariant 1: ingest lands in ledger, searchable.
-    ingest_resp = await handle_ingest(ctx, _ratified_payload(
-        "JWT is the session-auth primitive, not cookies.", with_region=False,
-    ))
+    ingest_resp = await handle_ingest(
+        ctx,
+        _ratified_payload(
+            "JWT is the session-auth primitive, not cookies.",
+            with_region=False,
+        ),
+    )
     assert ingest_resp.ingested
     assert len(ingest_resp.pending_grounding_decisions) == 1
     decision_id = ingest_resp.pending_grounding_decisions[0]["decision_id"]
@@ -152,26 +158,37 @@ async def test_ingest_bind_commit_marks_reflected(alpha_env):
     )
 
     # Invariant 2: bind is author-attested.
-    bind_resp = await handle_bind(ctx, bindings=[{
-        "decision_id": decision_id,
-        "file_path": "impl.py",
-        "symbol_name": "fetch_user",
-        "start_line": 1,
-        "end_line": 3,
-    }])
+    bind_resp = await handle_bind(
+        ctx,
+        bindings=[
+            {
+                "decision_id": decision_id,
+                "file_path": "impl.py",
+                "symbol_name": "fetch_user",
+                "start_line": 1,
+                "end_line": 3,
+            }
+        ],
+    )
     b = bind_resp.bindings[0]
     assert b.error is None, f"Invariant 2 FAIL: bind error: {b.error}"
     assert b.region_id and b.content_hash
 
     # Invariant 3: compliant verdict + ratified signoff → reflected.
-    rc = await handle_resolve_compliance(ctx, phase="ingest", verdicts=[{
-        "decision_id": decision_id,
-        "region_id": b.region_id,
-        "content_hash": b.content_hash,
-        "verdict": "compliant",
-        "confidence": "high",
-        "explanation": "fetch_user performs JWT lookup as decided.",
-    }])
+    rc = await handle_resolve_compliance(
+        ctx,
+        phase="ingest",
+        verdicts=[
+            {
+                "decision_id": decision_id,
+                "region_id": b.region_id,
+                "content_hash": b.content_hash,
+                "verdict": "compliant",
+                "confidence": "high",
+                "explanation": "fetch_user performs JWT lookup as decided.",
+            }
+        ],
+    )
     assert len(rc.accepted) == 1
     status = await _decision_status(ctx, decision_id)
     assert status == "reflected", f"Invariant 3 FAIL: expected reflected, got {status}"
@@ -187,36 +204,55 @@ async def test_code_edit_without_rebind_marks_drifted(alpha_env):
     """Invariant 3 drift arm — file edit after bind, no rebind → drifted."""
     ctx, repo_root = alpha_env
 
-    ingest_resp = await handle_ingest(ctx, _ratified_payload(
-        "Fetch user returns JWT-validated identity.", with_region=False,
-    ))
+    ingest_resp = await handle_ingest(
+        ctx,
+        _ratified_payload(
+            "Fetch user returns JWT-validated identity.",
+            with_region=False,
+        ),
+    )
     decision_id = ingest_resp.pending_grounding_decisions[0]["decision_id"]
 
-    bind_resp = await handle_bind(ctx, bindings=[{
-        "decision_id": decision_id,
-        "file_path": "impl.py",
-        "symbol_name": "fetch_user",
-        "start_line": 1,
-        "end_line": 3,
-    }])
+    bind_resp = await handle_bind(
+        ctx,
+        bindings=[
+            {
+                "decision_id": decision_id,
+                "file_path": "impl.py",
+                "symbol_name": "fetch_user",
+                "start_line": 1,
+                "end_line": 3,
+            }
+        ],
+    )
     b = bind_resp.bindings[0]
     assert b.error is None
 
-    await handle_resolve_compliance(ctx, phase="ingest", verdicts=[{
-        "decision_id": decision_id,
-        "region_id": b.region_id,
-        "content_hash": b.content_hash,
-        "verdict": "compliant",
-        "confidence": "high",
-        "explanation": "baseline verified",
-    }])
+    await handle_resolve_compliance(
+        ctx,
+        phase="ingest",
+        verdicts=[
+            {
+                "decision_id": decision_id,
+                "region_id": b.region_id,
+                "content_hash": b.content_hash,
+                "verdict": "compliant",
+                "confidence": "high",
+                "explanation": "baseline verified",
+            }
+        ],
+    )
     assert await _decision_status(ctx, decision_id) == "reflected"
 
-    _commit_edit(repo_root, """
+    _commit_edit(
+        repo_root,
+        """
         def fetch_user(user_id: int):
             # Cookie-based (violates JWT decision).
             return {"id": user_id, "session_cookie": "opaque"}
-        """, msg="drift-impl")
+        """,
+        msg="drift-impl",
+    )
 
     invalidate_sync_cache(ctx)
     lc = await handle_link_commit(ctx, "HEAD")
@@ -236,22 +272,29 @@ async def test_session_start_banner_surfaces_drifts(alpha_env):
     """Invariant 4 — cold MCP session with drifted decision → banner fires."""
     ctx, _ = alpha_env
 
-    ingest_resp = await handle_ingest(ctx, _ratified_payload(
-        "Billing webhook uses exponential backoff with jitter.", with_region=True,
-    ))
+    ingest_resp = await handle_ingest(
+        ctx,
+        _ratified_payload(
+            "Billing webhook uses exponential backoff with jitter.",
+            with_region=True,
+        ),
+    )
     assert ingest_resp.ingested
     decision_id = (
         ingest_resp.pending_grounding_decisions[0]["decision_id"]
         if ingest_resp.pending_grounding_decisions
-        else (ingest_resp.sync_status.pending_compliance_checks[0].decision_id
-              if (ingest_resp.sync_status and ingest_resp.sync_status.pending_compliance_checks)
-              else None)
+        else (
+            ingest_resp.sync_status.pending_compliance_checks[0].decision_id
+            if (ingest_resp.sync_status and ingest_resp.sync_status.pending_compliance_checks)
+            else None
+        )
     )
     assert decision_id, "Could not extract decision_id from ingest"
 
     # Force drift by writing a drifted verdict directly.
     inner = getattr(ctx.ledger, "_inner", ctx.ledger)
     from ledger.queries import update_decision_status
+
     await update_decision_status(inner._client, decision_id, "drifted")
 
     # Fresh session — clear banner cache.
@@ -283,22 +326,32 @@ async def test_preflight_surfaces_bound_decisions(monkeypatch, alpha_env):
     ctx = BicameralContext.from_env()
     assert ctx.guided_mode is True
 
-    ingest_resp = await handle_ingest(ctx, _ratified_payload(
-        "User fetch enforces per-tenant rate limits in middleware.", with_region=False,
-    ))
+    ingest_resp = await handle_ingest(
+        ctx,
+        _ratified_payload(
+            "User fetch enforces per-tenant rate limits in middleware.",
+            with_region=False,
+        ),
+    )
     decision_id = ingest_resp.pending_grounding_decisions[0]["decision_id"]
 
-    bind_resp = await handle_bind(ctx, bindings=[{
-        "decision_id": decision_id,
-        "file_path": "impl.py",
-        "symbol_name": "fetch_user",
-        "start_line": 1,
-        "end_line": 3,
-    }])
+    bind_resp = await handle_bind(
+        ctx,
+        bindings=[
+            {
+                "decision_id": decision_id,
+                "file_path": "impl.py",
+                "symbol_name": "fetch_user",
+                "start_line": 1,
+                "end_line": 3,
+            }
+        ],
+    )
     assert bind_resp.bindings[0].error is None
 
-    pf = await handle_preflight(ctx, topic="user fetch rate limit middleware",
-                                file_paths=["impl.py"])
+    pf = await handle_preflight(
+        ctx, topic="user fetch rate limit middleware", file_paths=["impl.py"]
+    )
     assert pf.fired, f"Invariant 5 FAIL: preflight did not fire; reason={pf.reason}"
     decision_ids_returned = [d.decision_id for d in pf.decisions]
     assert decision_id in decision_ids_returned, (
@@ -319,37 +372,56 @@ async def test_hook_no_fire_still_syncs(alpha_env):
     """
     ctx, repo_root = alpha_env
 
-    ingest_resp = await handle_ingest(ctx, _ratified_payload(
-        "Audit log retention 30 days, enforced at write path.", with_region=False,
-    ))
+    ingest_resp = await handle_ingest(
+        ctx,
+        _ratified_payload(
+            "Audit log retention 30 days, enforced at write path.",
+            with_region=False,
+        ),
+    )
     decision_id = ingest_resp.pending_grounding_decisions[0]["decision_id"]
 
-    bind_resp = await handle_bind(ctx, bindings=[{
-        "decision_id": decision_id,
-        "file_path": "impl.py",
-        "symbol_name": "fetch_user",
-        "start_line": 1,
-        "end_line": 3,
-    }])
+    bind_resp = await handle_bind(
+        ctx,
+        bindings=[
+            {
+                "decision_id": decision_id,
+                "file_path": "impl.py",
+                "symbol_name": "fetch_user",
+                "start_line": 1,
+                "end_line": 3,
+            }
+        ],
+    )
     b = bind_resp.bindings[0]
     assert b.error is None
 
-    await handle_resolve_compliance(ctx, phase="ingest", verdicts=[{
-        "decision_id": decision_id,
-        "region_id": b.region_id,
-        "content_hash": b.content_hash,
-        "verdict": "compliant",
-        "confidence": "high",
-        "explanation": "baseline",
-    }])
+    await handle_resolve_compliance(
+        ctx,
+        phase="ingest",
+        verdicts=[
+            {
+                "decision_id": decision_id,
+                "region_id": b.region_id,
+                "content_hash": b.content_hash,
+                "verdict": "compliant",
+                "confidence": "high",
+                "explanation": "baseline",
+            }
+        ],
+    )
     assert await _decision_status(ctx, decision_id) == "reflected"
 
     # Commit drift — no explicit link_commit call (simulates hook silence).
-    _commit_edit(repo_root, """
+    _commit_edit(
+        repo_root,
+        """
         def fetch_user(user_id: int):
             # Audit log bypassed.
             raise NotImplementedError
-        """, msg="bypass-audit-log")
+        """,
+        msg="bypass-audit-log",
+    )
 
     # ensure_ledger_synced must detect the new commit and sync.
     invalidate_sync_cache(ctx)
@@ -379,16 +451,18 @@ async def test_new_ingest_enters_as_proposal(alpha_env):
     payload = {
         "query": "Pagination defaults to 25 items per page.",
         "repo": "jacob-repo",
-        "mappings": [{
-            "intent": "Pagination defaults to 25 items per page.",
-            "span": {
-                "source_type": "transcript",
-                "text": "Pagination defaults to 25 items per page.",
-                "source_ref": "jacob-v0.7-test",
-            },
-            "symbols": [],
-            "code_regions": [],
-        }],
+        "mappings": [
+            {
+                "intent": "Pagination defaults to 25 items per page.",
+                "span": {
+                    "source_type": "transcript",
+                    "text": "Pagination defaults to 25 items per page.",
+                    "source_ref": "jacob-v0.7-test",
+                },
+                "symbols": [],
+                "code_regions": [],
+            }
+        ],
     }
     ingest_resp = await handle_ingest(ctx, payload)
     assert ingest_resp.ingested
@@ -397,14 +471,12 @@ async def test_new_ingest_enters_as_proposal(alpha_env):
     # Code-compliance status is 'ungrounded' (no regions bound yet).
     # Human-approval axis lives on signoff.state = 'proposed'.
     status = await _decision_status(ctx, decision_id)
-    assert status == "ungrounded", (
-        f"v0.9+ invariant FAIL: expected 'ungrounded', got '{status}'"
-    )
+    assert status == "ungrounded", f"v0.9+ invariant FAIL: expected 'ungrounded', got '{status}'"
 
     # After ratification, it remains ungrounded (no code regions bound).
     from handlers.ratify import handle_ratify
-    ratify_resp = await handle_ratify(ctx, decision_id=decision_id,
-                                     signer="jacob@example.com")
+
+    ratify_resp = await handle_ratify(ctx, decision_id=decision_id, signer="jacob@example.com")
     assert ratify_resp.was_new is True
     assert ratify_resp.signoff["state"] == "ratified"
 
@@ -428,22 +500,28 @@ async def test_ratify_idempotent(alpha_env):
     original signer and ratified_at timestamp must be preserved.
     """
     from handlers.ratify import handle_ratify
+
     ctx, _ = alpha_env
 
-    ingest_resp = await handle_ingest(ctx, {
-        "query": "Cache TTL is 5 minutes.",
-        "repo": "jacob-repo",
-        "mappings": [{
-            "intent": "Cache TTL is 5 minutes.",
-            "span": {
-                "source_type": "transcript",
-                "text": "Cache TTL is 5 minutes.",
-                "source_ref": "arch-review",
-            },
-            "symbols": [],
-            "code_regions": [],
-        }],
-    })
+    ingest_resp = await handle_ingest(
+        ctx,
+        {
+            "query": "Cache TTL is 5 minutes.",
+            "repo": "jacob-repo",
+            "mappings": [
+                {
+                    "intent": "Cache TTL is 5 minutes.",
+                    "span": {
+                        "source_type": "transcript",
+                        "text": "Cache TTL is 5 minutes.",
+                        "source_ref": "arch-review",
+                    },
+                    "symbols": [],
+                    "code_regions": [],
+                }
+            ],
+        },
+    )
     assert ingest_resp.ingested
     decision_id = ingest_resp.pending_grounding_decisions[0]["decision_id"]
 
@@ -457,4 +535,4 @@ async def test_ratify_idempotent(alpha_env):
     assert resp2.was_new is False
     assert resp2.signoff["state"] == "ratified"
     assert resp2.signoff["signer"] == "jin@example.com"  # original signer preserved
-    assert resp2.signoff["ratified_at"] == ratified_at   # timestamp unchanged
+    assert resp2.signoff["ratified_at"] == ratified_at  # timestamp unchanged
diff --git a/tests/test_ast_diff.py b/tests/test_ast_diff.py
index 1c2ddbec..89a57882 100644
--- a/tests/test_ast_diff.py
+++ b/tests/test_ast_diff.py
@@ -7,13 +7,13 @@
 bias the V2 caller-LLM verdict prompt toward "looks fine" on
 behaviorally-different code.
 """
+
 from __future__ import annotations
 
 import pytest
 
 from ledger.ast_diff import is_cosmetic_change
 
-
 # ── Whitelist: must return True ─────────────────────────────────────
 
 
diff --git a/tests/test_b2_cosmetic_hint.py b/tests/test_b2_cosmetic_hint.py
index 41953ec9..3132cb6a 100644
--- a/tests/test_b2_cosmetic_hint.py
+++ b/tests/test_b2_cosmetic_hint.py
@@ -9,6 +9,7 @@
   - cosmetic_hint stays False for renames / docstring edits / etc.
   - cosmetic_hint=True only for whitespace-only diffs
 """
+
 from __future__ import annotations
 
 from pathlib import Path
@@ -44,6 +45,7 @@ def repo_with_baseline(tmp_path):
     the working-tree file to whatever they need to compare against HEAD.
     """
     import subprocess
+
     repo = tmp_path / "repo"
     repo.mkdir()
     subprocess.run(["git", "init", "-q"], cwd=repo, check=True)
@@ -80,6 +82,7 @@ def test_docstring_edit_keeps_cosmetic_hint_false(repo_with_baseline, tmp_path):
     _write_file(repo, rel, "def f(x):\n    return x + 1\n")
     # Now overwrite baseline by committing a docstring-only version, then edit working tree.
     import subprocess
+
     _write_file(repo, rel, 'def f(x):\n    """Old."""\n    return x + 1\n')
     subprocess.run(["git", "add", "-A"], cwd=repo, check=True)
     subprocess.run(["git", "commit", "-q", "-m", "add docstring"], cwd=repo, check=True)
@@ -111,6 +114,7 @@ def test_no_diff_keeps_cosmetic_hint_false(repo_with_baseline):
 def test_unsupported_extension_keeps_cosmetic_hint_false(tmp_path):
     """Files outside EXTENSION_LANGUAGE never get a hint."""
     import subprocess
+
     repo = tmp_path / "repo2"
     repo.mkdir()
     subprocess.run(["git", "init", "-q"], cwd=repo, check=True)
diff --git a/tests/test_bind.py b/tests/test_bind.py
index bec9e988..b264ed44 100644
--- a/tests/test_bind.py
+++ b/tests/test_bind.py
@@ -8,6 +8,7 @@
 5. test_bind_idempotent — calling bind twice for same (decision, region) is a no-op
 6. test_bind_status_transition — after bind, decision status transitions to "pending"
 """
+
 from __future__ import annotations
 
 from unittest.mock import AsyncMock, patch
@@ -18,7 +19,6 @@
 from ledger.client import LedgerClient
 from ledger.schema import init_schema, migrate
 
-
 # ── Fixtures ──────────────────────────────────────────────────────────────────
 
 
@@ -57,6 +57,7 @@ async def test_bind_success_with_explicit_lines():
     client = await _fresh_client()
     try:
         from ledger.adapter import SurrealDBLedgerAdapter
+
         adapter = SurrealDBLedgerAdapter(url="memory://")
         adapter._client = client
         adapter._connected = True
@@ -64,14 +65,19 @@ async def test_bind_success_with_explicit_lines():
         decision_id = await _seed_decision(client, "Use BM25 for search")
         ctx = _StubCtx(adapter)
 
-        resp = await handle_bind(ctx, bindings=[{
-            "decision_id": decision_id,
-            "file_path": "server.py",
-            "symbol_name": "handle_search",
-            "start_line": 10,
-            "end_line": 30,
-            "purpose": "search handler",
-        }])
+        resp = await handle_bind(
+            ctx,
+            bindings=[
+                {
+                    "decision_id": decision_id,
+                    "file_path": "server.py",
+                    "symbol_name": "handle_search",
+                    "start_line": 10,
+                    "end_line": 30,
+                    "purpose": "search handler",
+                }
+            ],
+        )
 
         assert len(resp.bindings) == 1
         b = resp.bindings[0]
@@ -94,6 +100,7 @@ async def test_bind_symbol_resolution():
     client = await _fresh_client()
     try:
         from ledger.adapter import SurrealDBLedgerAdapter
+
         adapter = SurrealDBLedgerAdapter(url="memory://")
         adapter._client = client
         adapter._connected = True
@@ -102,11 +109,16 @@ async def test_bind_symbol_resolution():
         ctx = _StubCtx(adapter)
 
         with patch("ledger.status.resolve_symbol_lines", return_value=(5, 25)):
-            resp = await handle_bind(ctx, bindings=[{
-                "decision_id": decision_id,
-                "file_path": "middleware.py",
-                "symbol_name": "rate_limit",
-            }])
+            resp = await handle_bind(
+                ctx,
+                bindings=[
+                    {
+                        "decision_id": decision_id,
+                        "file_path": "middleware.py",
+                        "symbol_name": "rate_limit",
+                    }
+                ],
+            )
 
         assert len(resp.bindings) == 1
         b = resp.bindings[0]
@@ -126,6 +138,7 @@ async def test_bind_unknown_decision_id():
     client = await _fresh_client()
     try:
         from ledger.adapter import SurrealDBLedgerAdapter
+
         adapter = SurrealDBLedgerAdapter(url="memory://")
         adapter._client = client
         adapter._connected = True
@@ -133,13 +146,18 @@ async def test_bind_unknown_decision_id():
         ctx = _StubCtx(adapter)
         fake_id = "decision:fake_does_not_exist_xyz"
 
-        resp = await handle_bind(ctx, bindings=[{
-            "decision_id": fake_id,
-            "file_path": "server.py",
-            "symbol_name": "some_func",
-            "start_line": 1,
-            "end_line": 10,
-        }])
+        resp = await handle_bind(
+            ctx,
+            bindings=[
+                {
+                    "decision_id": fake_id,
+                    "file_path": "server.py",
+                    "symbol_name": "some_func",
+                    "start_line": 1,
+                    "end_line": 10,
+                }
+            ],
+        )
 
         assert len(resp.bindings) == 1
         b = resp.bindings[0]
@@ -159,6 +177,7 @@ async def test_bind_symbol_not_found():
     client = await _fresh_client()
     try:
         from ledger.adapter import SurrealDBLedgerAdapter
+
         adapter = SurrealDBLedgerAdapter(url="memory://")
         adapter._client = client
         adapter._connected = True
@@ -167,11 +186,16 @@ async def test_bind_symbol_not_found():
         ctx = _StubCtx(adapter)
 
         with patch("ledger.status.resolve_symbol_lines", return_value=None):
-            resp = await handle_bind(ctx, bindings=[{
-                "decision_id": decision_id,
-                "file_path": "cache.py",
-                "symbol_name": "evict_stale",
-            }])
+            resp = await handle_bind(
+                ctx,
+                bindings=[
+                    {
+                        "decision_id": decision_id,
+                        "file_path": "cache.py",
+                        "symbol_name": "evict_stale",
+                    }
+                ],
+            )
 
         assert len(resp.bindings) == 1
         b = resp.bindings[0]
@@ -191,6 +215,7 @@ async def test_bind_idempotent():
     client = await _fresh_client()
     try:
         from ledger.adapter import SurrealDBLedgerAdapter
+
         adapter = SurrealDBLedgerAdapter(url="memory://")
         adapter._client = client
         adapter._connected = True
@@ -227,6 +252,7 @@ async def test_bind_status_transition():
     client = await _fresh_client()
     try:
         from ledger.adapter import SurrealDBLedgerAdapter
+
         adapter = SurrealDBLedgerAdapter(url="memory://")
         adapter._client = client
         adapter._connected = True
@@ -235,25 +261,26 @@ async def test_bind_status_transition():
         ctx = _StubCtx(adapter)
 
         # Verify starting status is ungrounded
-        rows = await client.query(
-            f"SELECT status FROM {decision_id} LIMIT 1"
-        )
+        rows = await client.query(f"SELECT status FROM {decision_id} LIMIT 1")
         assert rows and rows[0].get("status") == "ungrounded"
 
-        resp = await handle_bind(ctx, bindings=[{
-            "decision_id": decision_id,
-            "file_path": "pagination.py",
-            "symbol_name": "paginate",
-            "start_line": 1,
-            "end_line": 15,
-        }])
+        resp = await handle_bind(
+            ctx,
+            bindings=[
+                {
+                    "decision_id": decision_id,
+                    "file_path": "pagination.py",
+                    "symbol_name": "paginate",
+                    "start_line": 1,
+                    "end_line": 15,
+                }
+            ],
+        )
 
         assert resp.bindings[0].error is None
 
         # Status should now be "pending"
-        rows = await client.query(
-            f"SELECT status FROM {decision_id} LIMIT 1"
-        )
+        rows = await client.query(f"SELECT status FROM {decision_id} LIMIT 1")
         assert rows and rows[0].get("status") == "pending"
     finally:
         await client.close()
diff --git a/tests/test_codegenome_adapter.py b/tests/test_codegenome_adapter.py
index ce01128c..3df5bb42 100644
--- a/tests/test_codegenome_adapter.py
+++ b/tests/test_codegenome_adapter.py
@@ -22,7 +22,6 @@
     DeterministicCodeGenomeAdapter,
 )
 
-
 # ── Phase 1: ABC + dataclasses ──────────────────────────────────────────────
 
 
diff --git a/tests/test_codegenome_bind_integration.py b/tests/test_codegenome_bind_integration.py
index 6bdaae26..0854b407 100644
--- a/tests/test_codegenome_bind_integration.py
+++ b/tests/test_codegenome_bind_integration.py
@@ -56,7 +56,9 @@ def __init__(self, ledger, *, write_identity_records):
 def _stub_bind_dependencies(content_hash="abc123"):
     stack = ExitStack()
     stack.enter_context(patch("ledger.adapter.compute_content_hash", return_value=content_hash))
-    stack.enter_context(patch("ledger.status.get_git_content", return_value="def foo():\n    return 1\n"))
+    stack.enter_context(
+        patch("ledger.status.get_git_content", return_value="def foo():\n    return 1\n")
+    )
     stack.enter_context(patch("ledger.status.hash_lines", return_value=content_hash))
     return stack
 
@@ -75,13 +77,18 @@ async def test_bind_with_flag_off_writes_no_identity():
         ctx = _CtxWithCodegenome(adapter, write_identity_records=False)
 
         with _stub_bind_dependencies(content_hash="hash_off"):
-            resp = await handle_bind(ctx, bindings=[{
-                "decision_id": decision_id,
-                "file_path": "server.py",
-                "symbol_name": "handle_search",
-                "start_line": 10,
-                "end_line": 30,
-            }])
+            resp = await handle_bind(
+                ctx,
+                bindings=[
+                    {
+                        "decision_id": decision_id,
+                        "file_path": "server.py",
+                        "symbol_name": "handle_search",
+                        "start_line": 10,
+                        "end_line": 30,
+                    }
+                ],
+            )
 
         assert len(resp.bindings) == 1
         assert resp.bindings[0].error is None
@@ -111,13 +118,18 @@ async def test_bind_with_flag_on_writes_identity_and_links_decision():
 
         fixed_hash = "deadbeefcafe1234"
         with _stub_bind_dependencies(content_hash=fixed_hash):
-            resp = await handle_bind(ctx, bindings=[{
-                "decision_id": decision_id,
-                "file_path": "checkout/rate_limit.py",
-                "symbol_name": "enforce_checkout_rate_limit",
-                "start_line": 24,
-                "end_line": 67,
-            }])
+            resp = await handle_bind(
+                ctx,
+                bindings=[
+                    {
+                        "decision_id": decision_id,
+                        "file_path": "checkout/rate_limit.py",
+                        "symbol_name": "enforce_checkout_rate_limit",
+                        "start_line": 24,
+                        "end_line": 67,
+                    }
+                ],
+            )
 
         assert len(resp.bindings) == 1
         bind_result = resp.bindings[0]
@@ -189,17 +201,26 @@ async def test_codegenome_failure_does_not_change_bind_response():
         decision_id = await _seed_decision(client, "x")
         ctx = _CtxWithCodegenome(adapter, write_identity_records=True)
 
-        with patch.object(
-            ctx.codegenome, "compute_identity",
-            side_effect=RuntimeError("simulated codegenome failure"),
-        ), _stub_bind_dependencies(content_hash="h2"):
-            resp = await handle_bind(ctx, bindings=[{
-                "decision_id": decision_id,
-                "file_path": "a.py",
-                "symbol_name": "f",
-                "start_line": 1,
-                "end_line": 5,
-            }])
+        with (
+            patch.object(
+                ctx.codegenome,
+                "compute_identity",
+                side_effect=RuntimeError("simulated codegenome failure"),
+            ),
+            _stub_bind_dependencies(content_hash="h2"),
+        ):
+            resp = await handle_bind(
+                ctx,
+                bindings=[
+                    {
+                        "decision_id": decision_id,
+                        "file_path": "a.py",
+                        "symbol_name": "f",
+                        "start_line": 1,
+                        "end_line": 5,
+                    }
+                ],
+            )
 
         assert len(resp.bindings) == 1
         assert resp.bindings[0].error is None
diff --git a/tests/test_codegenome_confidence.py b/tests/test_codegenome_confidence.py
index cad66c28..80d3a835 100644
--- a/tests/test_codegenome_confidence.py
+++ b/tests/test_codegenome_confidence.py
@@ -8,7 +8,6 @@
 
 from codegenome.confidence import noisy_or, weighted_average
 
-
 # ── noisy_or ────────────────────────────────────────────────────────────────
 
 
diff --git a/tests/test_codegenome_config.py b/tests/test_codegenome_config.py
index 3a25e9b9..6fc8699d 100644
--- a/tests/test_codegenome_config.py
+++ b/tests/test_codegenome_config.py
@@ -6,7 +6,6 @@
 
 from codegenome.config import CodeGenomeConfig
 
-
 _ALL_FLAGS = (
     "BICAMERAL_CODEGENOME_ENABLED",
     "BICAMERAL_CODEGENOME_WRITE_IDENTITY_RECORDS",
@@ -67,6 +66,10 @@ def test_identity_writes_active_requires_both_flags():
     assert CodeGenomeConfig().identity_writes_active() is False
     assert CodeGenomeConfig(enabled=True).identity_writes_active() is False
     assert CodeGenomeConfig(write_identity_records=True).identity_writes_active() is False
-    assert CodeGenomeConfig(
-        enabled=True, write_identity_records=True,
-    ).identity_writes_active() is True
+    assert (
+        CodeGenomeConfig(
+            enabled=True,
+            write_identity_records=True,
+        ).identity_writes_active()
+        is True
+    )
diff --git a/tests/test_codegenome_l1_exemption.py b/tests/test_codegenome_l1_exemption.py
index 9baf8796..0605b741 100644
--- a/tests/test_codegenome_l1_exemption.py
+++ b/tests/test_codegenome_l1_exemption.py
@@ -24,7 +24,6 @@
 from ledger.client import LedgerClient
 from ledger.schema import init_schema, migrate
 
-
 # ── Fixtures ────────────────────────────────────────────────────────────────
 
 
@@ -60,7 +59,8 @@ def __init__(self, ledger):
         self.codegenome = DeterministicCodeGenomeAdapter(repo_path=self.repo_path)
         # Both flags ON — L1 guard is the only thing that should suppress writes.
         self.codegenome_config = CodeGenomeConfig(
-            enabled=True, write_identity_records=True,
+            enabled=True,
+            write_identity_records=True,
         )
 
 
@@ -97,12 +97,18 @@ async def test_bind_l2_writes_identity():
         ctx = _CtxWithCodegenome(adapter)
 
         with _stub_bind_dependencies("h_l2"):
-            resp = await handle_bind(ctx, bindings=[{
-                "decision_id": decision_id,
-                "file_path": "ledger/client.py",
-                "symbol_name": "WALWriter",
-                "start_line": 10, "end_line": 30,
-            }])
+            resp = await handle_bind(
+                ctx,
+                bindings=[
+                    {
+                        "decision_id": decision_id,
+                        "file_path": "ledger/client.py",
+                        "symbol_name": "WALWriter",
+                        "start_line": 10,
+                        "end_line": 30,
+                    }
+                ],
+            )
         assert resp.bindings[0].error is None
 
         cs, si, ab = await _count_codegenome_rows(client)
@@ -131,17 +137,25 @@ async def test_bind_l1_skips_codegenome_writes():
         adapter._client = client
         adapter._connected = True
         decision_id = await _seed_decision(
-            client, description="Users can pause subscription for 90 days", level="L1",
+            client,
+            description="Users can pause subscription for 90 days",
+            level="L1",
         )
         ctx = _CtxWithCodegenome(adapter)
 
         with _stub_bind_dependencies("h_l1"):
-            resp = await handle_bind(ctx, bindings=[{
-                "decision_id": decision_id,
-                "file_path": "subscriptions/pause.py",
-                "symbol_name": "pause_subscription",
-                "start_line": 1, "end_line": 20,
-            }])
+            resp = await handle_bind(
+                ctx,
+                bindings=[
+                    {
+                        "decision_id": decision_id,
+                        "file_path": "subscriptions/pause.py",
+                        "symbol_name": "pause_subscription",
+                        "start_line": 1,
+                        "end_line": 20,
+                    }
+                ],
+            )
         # Bind itself succeeds (binds_to + code_region still written —
         # the bind contract is unchanged). Only the codegenome
         # side-effect is suppressed.
@@ -168,16 +182,25 @@ async def test_bind_l3_skips_codegenome_writes():
         adapter._client = client
         adapter._connected = True
         decision_id = await _seed_decision(
-            client, description="Loop unroll factor 4 in hot path", level="L3",
+            client,
+            description="Loop unroll factor 4 in hot path",
+            level="L3",
         )
         ctx = _CtxWithCodegenome(adapter)
 
         with _stub_bind_dependencies("h_l3"):
-            await handle_bind(ctx, bindings=[{
-                "decision_id": decision_id,
-                "file_path": "vm/eval.py", "symbol_name": "eval_loop",
-                "start_line": 100, "end_line": 200,
-            }])
+            await handle_bind(
+                ctx,
+                bindings=[
+                    {
+                        "decision_id": decision_id,
+                        "file_path": "vm/eval.py",
+                        "symbol_name": "eval_loop",
+                        "start_line": 100,
+                        "end_line": 200,
+                    }
+                ],
+            )
 
         cs, si, ab = await _count_codegenome_rows(client)
         assert (cs, si, ab) == (0, 0, 0)
@@ -202,16 +225,25 @@ async def test_bind_unclassified_decision_level_skips_codegenome_writes():
         adapter._client = client
         adapter._connected = True
         decision_id = await _seed_decision(
-            client, description="legacy ungrouped decision", level=None,
+            client,
+            description="legacy ungrouped decision",
+            level=None,
         )
         ctx = _CtxWithCodegenome(adapter)
 
         with _stub_bind_dependencies("h_null"):
-            await handle_bind(ctx, bindings=[{
-                "decision_id": decision_id,
-                "file_path": "x.py", "symbol_name": "x",
-                "start_line": 1, "end_line": 5,
-            }])
+            await handle_bind(
+                ctx,
+                bindings=[
+                    {
+                        "decision_id": decision_id,
+                        "file_path": "x.py",
+                        "symbol_name": "x",
+                        "start_line": 1,
+                        "end_line": 5,
+                    }
+                ],
+            )
 
         cs, si, ab = await _count_codegenome_rows(client)
         assert (cs, si, ab) == (0, 0, 0)
@@ -234,16 +266,25 @@ async def test_bind_response_shape_unchanged_for_l1():
         adapter._client = client
         adapter._connected = True
         decision_id = await _seed_decision(
-            client, description="Members can pause subscription", level="L1",
+            client,
+            description="Members can pause subscription",
+            level="L1",
         )
         ctx = _CtxWithCodegenome(adapter)
 
         with _stub_bind_dependencies("h_shape"):
-            resp = await handle_bind(ctx, bindings=[{
-                "decision_id": decision_id,
-                "file_path": "src/x.py", "symbol_name": "x",
-                "start_line": 1, "end_line": 5,
-            }])
+            resp = await handle_bind(
+                ctx,
+                bindings=[
+                    {
+                        "decision_id": decision_id,
+                        "file_path": "src/x.py",
+                        "symbol_name": "x",
+                        "start_line": 1,
+                        "end_line": 5,
+                    }
+                ],
+            )
 
         bind = resp.bindings[0]
         assert bind.error is None
diff --git a/tests/test_compliance_cache_semantics.py b/tests/test_compliance_cache_semantics.py
index d900af0c..9606ad49 100644
--- a/tests/test_compliance_cache_semantics.py
+++ b/tests/test_compliance_cache_semantics.py
@@ -6,6 +6,7 @@
 - Seeding a compliance_check row via resolve_compliance (simulated here by
   direct write) promotes the decision out of PENDING
 """
+
 from __future__ import annotations
 
 import pytest
@@ -15,7 +16,6 @@
 from ledger.schema import init_schema, migrate
 from ledger.status import derive_status
 
-
 # ── Pure unit tests: derive_status decision table ────────────────────
 
 
diff --git a/tests/test_compliance_check_schema.py b/tests/test_compliance_check_schema.py
index b1409af4..7c55e92c 100644
--- a/tests/test_compliance_check_schema.py
+++ b/tests/test_compliance_check_schema.py
@@ -11,6 +11,7 @@
 These tests pin the fields, the enum constraints, the defaults, and the
 UNIQUE cache-key index. They run against memory:// for hermetic isolation.
 """
+
 from __future__ import annotations
 
 import pytest
@@ -165,9 +166,7 @@ async def test_phase_accepts_all_five_reserved_values():
     """
     c = await _fresh_client()
     try:
-        for i, phase in enumerate(
-            ("ingest", "drift", "regrounding", "supersession", "divergence")
-        ):
+        for i, phase in enumerate(("ingest", "drift", "regrounding", "supersession", "divergence")):
             await c.execute(
                 "CREATE compliance_check SET decision_id = $i, region_id = $r, "
                 "content_hash = $h, verdict = 'compliant', confidence = 'high', "
@@ -298,10 +297,7 @@ async def test_init_schema_is_idempotent_against_existing_db():
         await init_schema(c)
 
         # Sanity: schema still works after repeated inits.
-        await c.execute(
-            "CREATE intent SET description = 'init-idem test', "
-            "source_type = 'manual'"
-        )
+        await c.execute("CREATE intent SET description = 'init-idem test', source_type = 'manual'")
         rows = await c.query("SELECT description FROM intent")
         assert len(rows) == 1
         assert rows[0]["description"] == "init-idem test"
diff --git a/tests/test_consent_notice.py b/tests/test_consent_notice.py
index caced0e9..1682173d 100644
--- a/tests/test_consent_notice.py
+++ b/tests/test_consent_notice.py
@@ -13,7 +13,9 @@
 
 def _reload_consent():
     import importlib
+
     import consent
+
     importlib.reload(consent)
     return consent
 
@@ -21,7 +23,9 @@ def _reload_consent():
 # ── telemetry_allowed() — gating behavior ──────────────────────────────
 
 
-def test_telemetry_allowed_no_marker_default_on(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
+def test_telemetry_allowed_no_marker_default_on(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
     """No marker: default-on (preserves upgrade-path behavior)."""
     monkeypatch.setenv("HOME", str(tmp_path))
     monkeypatch.setenv("USERPROFILE", str(tmp_path))
@@ -30,7 +34,9 @@ def test_telemetry_allowed_no_marker_default_on(tmp_path: Path, monkeypatch: pyt
     assert consent.telemetry_allowed() is True
 
 
-def test_telemetry_allowed_env_off_overrides_marker(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
+def test_telemetry_allowed_env_off_overrides_marker(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
     """Env BICAMERAL_TELEMETRY=0 wins even when marker says enabled."""
     monkeypatch.setenv("HOME", str(tmp_path))
     monkeypatch.setenv("USERPROFILE", str(tmp_path))
@@ -137,7 +143,14 @@ def test_notice_re_emitted_on_policy_version_bump(
     # Simulate a stale marker (older policy version).
     (tmp_path / ".bicameral").mkdir(parents=True, exist_ok=True)
     (tmp_path / ".bicameral" / "consent.json").write_text(
-        json.dumps({"telemetry": "enabled", "policy_version": 0, "acknowledged_at": "x", "acknowledged_via": "wizard"}),
+        json.dumps(
+            {
+                "telemetry": "enabled",
+                "policy_version": 0,
+                "acknowledged_at": "x",
+                "acknowledged_via": "wizard",
+            }
+        ),
         encoding="utf-8",
     )
 
@@ -170,7 +183,9 @@ def test_notice_swallows_marker_write_failure(
     monkeypatch.setenv("USERPROFILE", str(tmp_path))
     monkeypatch.delenv("BICAMERAL_SKIP_CONSENT_NOTICE", raising=False)
     consent = _reload_consent()
-    monkeypatch.setattr(consent, "write_consent", lambda *a, **kw: (_ for _ in ()).throw(OSError("disk full")))
+    monkeypatch.setattr(
+        consent, "write_consent", lambda *a, **kw: (_ for _ in ()).throw(OSError("disk full"))
+    )
     # Must not raise.
     consent.notify_if_first_run()
 
@@ -186,7 +201,9 @@ def test_telemetry_send_event_blocked_when_consent_disabled(
     consent.write_consent(telemetry=False, via="wizard")
 
     import importlib
+
     import telemetry
+
     importlib.reload(telemetry)
 
     # Patch the network path; if relay was attempted, this would be called.
@@ -195,6 +212,7 @@ def test_telemetry_send_event_blocked_when_consent_disabled(
     telemetry.send_event("0.13.3", skill="bicameral-ingest", duration_ms=100)
     # Counter should still increment locally.
     import local_counters
+
     importlib.reload(local_counters)
     # Relay was NOT called (consent denied).
     assert sent == []
diff --git a/tests/test_desync_scenarios.py b/tests/test_desync_scenarios.py
index c70a88c3..2b04ef03 100644
--- a/tests/test_desync_scenarios.py
+++ b/tests/test_desync_scenarios.py
@@ -30,6 +30,7 @@
 than via server-side magic. Scenarios depending on V2-only tools
 (``bicameral_rebind``, ``record_compliance_verdict``) are marked xfail.
 """
+
 from __future__ import annotations
 
 import subprocess
@@ -45,7 +46,6 @@
 from handlers.ingest import handle_ingest
 from handlers.link_commit import handle_link_commit, invalidate_sync_cache
 
-
 # ── Helpers ──────────────────────────────────────────────────────────
 
 
@@ -109,16 +109,19 @@ def _scenario_repo(monkeypatch, tmp_path):
     monkeypatch.setenv("USE_REAL_LEDGER", "1")
     monkeypatch.setenv("SURREAL_URL", "memory://")
     repo = tmp_path / "repo"
-    _seed_repo(repo, {
-        "src/payments.py": """
+    _seed_repo(
+        repo,
+        {
+            "src/payments.py": """
             def calculate_discount(order_total: float) -> float:
                 return order_total * 0.1
         """,
-        "src/auth.py": """
+            "src/auth.py": """
             def verify_token(token: str) -> bool:
                 return token.startswith("valid:")
         """,
-    })
+        },
+    )
     monkeypatch.setenv("REPO_PATH", str(repo))
     monkeypatch.setenv("BICAMERAL_AUTHORITATIVE_REF", "main")
     monkeypatch.chdir(repo)
@@ -156,11 +159,16 @@ async def test_scenario_01_new_decision_with_existing_code(_scenario_repo):
     assert ungrounded, f"Expected ungrounded grounding check, got: {lc.pending_grounding_checks}"
     decision_id = ungrounded[0]["decision_id"]
 
-    bind_resp = await handle_bind(ctx, [{
-        "decision_id": decision_id,
-        "file_path": "src/payments.py",
-        "symbol_name": "calculate_discount",
-    }])
+    bind_resp = await handle_bind(
+        ctx,
+        [
+            {
+                "decision_id": decision_id,
+                "file_path": "src/payments.py",
+                "symbol_name": "calculate_discount",
+            }
+        ],
+    )
     assert bind_resp.bindings
     assert not bind_resp.bindings[0].error, bind_resp.bindings[0].error
 
@@ -184,14 +192,16 @@ async def test_scenario_02_code_changed_after_grounded_pending_until_verdict(_sc
         _scenario_repo,
         text="Apply discount",
         intent="Apply 10% discount",
-        code_regions=[{
-            "file_path": "src/payments.py",
-            "symbol": "calculate_discount",
-            "start_line": 1,
-            "end_line": 2,
-            "type": "function",
-            "purpose": "discount calc",
-        }],
+        code_regions=[
+            {
+                "file_path": "src/payments.py",
+                "symbol": "calculate_discount",
+                "start_line": 1,
+                "end_line": 2,
+                "type": "function",
+                "purpose": "discount calc",
+            }
+        ],
     )
     await handle_ingest(ctx, payload)
 
@@ -230,12 +240,16 @@ async def test_scenario_03_code_deleted_after_grounded_pending(_scenario_repo):
         _scenario_repo,
         text="Apply discount",
         intent="Apply 10% discount",
-        code_regions=[{
-            "file_path": "src/payments.py",
-            "symbol": "calculate_discount",
-            "start_line": 1, "end_line": 2,
-            "type": "function", "purpose": "discount calc",
-        }],
+        code_regions=[
+            {
+                "file_path": "src/payments.py",
+                "symbol": "calculate_discount",
+                "start_line": 1,
+                "end_line": 2,
+                "type": "function",
+                "purpose": "discount calc",
+            }
+        ],
     )
     await handle_ingest(ctx, payload)
 
@@ -245,7 +259,9 @@ async def test_scenario_03_code_deleted_after_grounded_pending(_scenario_repo):
     lc = await handle_link_commit(ctx, "HEAD")
 
     # Symbol disappeared on authoritative ref.
-    disappeared = [c for c in lc.pending_grounding_checks if c.get("reason") == "symbol_disappeared"]
+    disappeared = [
+        c for c in lc.pending_grounding_checks if c.get("reason") == "symbol_disappeared"
+    ]
     assert disappeared, f"Expected symbol_disappeared check, got: {lc.pending_grounding_checks}"
 
 
@@ -258,12 +274,16 @@ async def test_scenario_04_symbol_renamed_in_file(_scenario_repo):
         _scenario_repo,
         text="Apply discount",
         intent="Apply 10% discount",
-        code_regions=[{
-            "file_path": "src/payments.py",
-            "symbol": "calculate_discount",
-            "start_line": 1, "end_line": 2,
-            "type": "function", "purpose": "discount calc",
-        }],
+        code_regions=[
+            {
+                "file_path": "src/payments.py",
+                "symbol": "calculate_discount",
+                "start_line": 1,
+                "end_line": 2,
+                "type": "function",
+                "purpose": "discount calc",
+            }
+        ],
     )
     await handle_ingest(ctx, payload)
 
@@ -274,7 +294,9 @@ async def test_scenario_04_symbol_renamed_in_file(_scenario_repo):
     invalidate_sync_cache(ctx)
     lc = await handle_link_commit(ctx, "HEAD")
 
-    disappeared = [c for c in lc.pending_grounding_checks if c.get("reason") == "symbol_disappeared"]
+    disappeared = [
+        c for c in lc.pending_grounding_checks if c.get("reason") == "symbol_disappeared"
+    ]
     assert disappeared, f"Expected symbol_disappeared, got: {lc.pending_grounding_checks}"
     assert disappeared[0]["symbol"] == "calculate_discount"
     # V1 D1: original_lines is part of the payload.
@@ -290,12 +312,16 @@ async def test_scenario_05_symbol_moved_to_different_file(_scenario_repo):
         _scenario_repo,
         text="Apply discount",
         intent="Apply 10% discount",
-        code_regions=[{
-            "file_path": "src/payments.py",
-            "symbol": "calculate_discount",
-            "start_line": 1, "end_line": 2,
-            "type": "function", "purpose": "discount calc",
-        }],
+        code_regions=[
+            {
+                "file_path": "src/payments.py",
+                "symbol": "calculate_discount",
+                "start_line": 1,
+                "end_line": 2,
+                "type": "function",
+                "purpose": "discount calc",
+            }
+        ],
     )
     await handle_ingest(ctx, payload)
 
@@ -307,8 +333,12 @@ async def test_scenario_05_symbol_moved_to_different_file(_scenario_repo):
     invalidate_sync_cache(ctx)
     lc = await handle_link_commit(ctx, "HEAD")
 
-    disappeared = [c for c in lc.pending_grounding_checks if c.get("reason") == "symbol_disappeared"]
-    assert disappeared, f"Expected symbol_disappeared on cross-file move, got: {lc.pending_grounding_checks}"
+    disappeared = [
+        c for c in lc.pending_grounding_checks if c.get("reason") == "symbol_disappeared"
+    ]
+    assert disappeared, (
+        f"Expected symbol_disappeared on cross-file move, got: {lc.pending_grounding_checks}"
+    )
 
 
 @pytest.mark.phase2
@@ -347,13 +377,18 @@ async def test_scenario_06_code_added_ungrounded_resolvable(_scenario_repo):
     # Pass explicit lines — ctx.authoritative_sha is captured at ctx
     # creation and is stale after the new commit, so resolve_symbol_lines
     # would look at the wrong ref. Explicit lines bypass resolution.
-    bind_resp = await handle_bind(ctx, [{
-        "decision_id": decision_id,
-        "file_path": "src/cart.py",
-        "symbol_name": "cart_total",
-        "start_line": 1,
-        "end_line": 2,
-    }])
+    bind_resp = await handle_bind(
+        ctx,
+        [
+            {
+                "decision_id": decision_id,
+                "file_path": "src/cart.py",
+                "symbol_name": "cart_total",
+                "start_line": 1,
+                "end_line": 2,
+            }
+        ],
+    )
     assert bind_resp.bindings and not bind_resp.bindings[0].error, (
         f"bind failed: {bind_resp.bindings[0].error if bind_resp.bindings else 'no result'}"
     )
@@ -413,24 +448,32 @@ async def test_scenario_09_intent_description_supersession(_scenario_repo):
         _scenario_repo,
         text="Apply discount",
         intent="Apply 10% discount on orders",
-        code_regions=[{
-            "file_path": "src/payments.py",
-            "symbol": "calculate_discount",
-            "start_line": 1, "end_line": 2,
-            "type": "function", "purpose": "discount calc",
-        }],
+        code_regions=[
+            {
+                "file_path": "src/payments.py",
+                "symbol": "calculate_discount",
+                "start_line": 1,
+                "end_line": 2,
+                "type": "function",
+                "purpose": "discount calc",
+            }
+        ],
         source_ref="meeting-1",
     )
     p2 = _build_payload(
         _scenario_repo,
         text="Apply discount with backoff",
         intent="Apply 15% discount on orders over $100",
-        code_regions=[{
-            "file_path": "src/payments.py",
-            "symbol": "calculate_discount",
-            "start_line": 1, "end_line": 2,
-            "type": "function", "purpose": "discount calc",
-        }],
+        code_regions=[
+            {
+                "file_path": "src/payments.py",
+                "symbol": "calculate_discount",
+                "start_line": 1,
+                "end_line": 2,
+                "type": "function",
+                "purpose": "discount calc",
+            }
+        ],
         source_ref="meeting-2",
     )
     r1 = await handle_ingest(ctx, p1)
@@ -446,17 +489,31 @@ async def test_scenario_10_multiple_intents_share_symbol(_scenario_repo):
     region = {
         "file_path": "src/auth.py",
         "symbol": "verify_token",
-        "start_line": 1, "end_line": 2,
-        "type": "function", "purpose": "auth check",
+        "start_line": 1,
+        "end_line": 2,
+        "type": "function",
+        "purpose": "auth check",
     }
-    await handle_ingest(ctx, _build_payload(
-        _scenario_repo, text="Verify JWT", intent="Use JWT verification",
-        code_regions=[region], source_ref="m1",
-    ))
-    await handle_ingest(ctx, _build_payload(
-        _scenario_repo, text="Reject invalid", intent="Reject malformed tokens",
-        code_regions=[region], source_ref="m2",
-    ))
+    await handle_ingest(
+        ctx,
+        _build_payload(
+            _scenario_repo,
+            text="Verify JWT",
+            intent="Use JWT verification",
+            code_regions=[region],
+            source_ref="m1",
+        ),
+    )
+    await handle_ingest(
+        ctx,
+        _build_payload(
+            _scenario_repo,
+            text="Reject invalid",
+            intent="Reject malformed tokens",
+            code_regions=[region],
+            source_ref="m2",
+        ),
+    )
     invalidate_sync_cache(ctx)
     drift = await handle_detect_drift(ctx, "src/auth.py")
     decision_ids = {d.decision_id for d in drift.decisions}
@@ -507,18 +564,25 @@ async def test_scenario_12_line_shift_does_not_trigger_drift(_scenario_repo):
     region = {
         "file_path": "src/auth.py",
         "symbol": "verify_token",
-        "start_line": 1, "end_line": 2,
-        "type": "function", "purpose": "auth check",
+        "start_line": 1,
+        "end_line": 2,
+        "type": "function",
+        "purpose": "auth check",
     }
-    await handle_ingest(ctx, _build_payload(
-        _scenario_repo, text="Use JWT", intent="JWT verification",
-        code_regions=[region],
-    ))
+    await handle_ingest(
+        ctx,
+        _build_payload(
+            _scenario_repo,
+            text="Use JWT",
+            intent="JWT verification",
+            code_regions=[region],
+        ),
+    )
 
     # Insert blank lines above — line numbers shift but the symbol bytes
     # are identical.
     (_scenario_repo / "src/auth.py").write_text(
-        "\n\n\ndef verify_token(token: str) -> bool:\n    return token.startswith(\"valid:\")\n"
+        '\n\n\ndef verify_token(token: str) -> bool:\n    return token.startswith("valid:")\n'
     )
     _commit(_scenario_repo, "insert blank lines above")
     invalidate_sync_cache(ctx)
@@ -526,7 +590,9 @@ async def test_scenario_12_line_shift_does_not_trigger_drift(_scenario_repo):
 
     drift = await handle_detect_drift(ctx, "src/auth.py")
     drifted = [d for d in drift.decisions if d.status == "drifted"]
-    assert not drifted, f"Line-shift edit must NOT trigger drift, got: {[(d.status, d.symbol, d.lines) for d in drift.decisions]}"
+    assert not drifted, (
+        f"Line-shift edit must NOT trigger drift, got: {[(d.status, d.symbol, d.lines) for d in drift.decisions]}"
+    )
 
 
 @pytest.mark.phase2
diff --git a/tests/test_e2e_asserters.py b/tests/test_e2e_asserters.py
new file mode 100644
index 00000000..e5ec4073
--- /dev/null
+++ b/tests/test_e2e_asserters.py
@@ -0,0 +1,176 @@
+"""Unit tests for the e2e flow asserters.
+
+Run the asserter functions in isolation against synthetic tool-call lists.
+Lets us pin behaviour like "Flow 1 accepts any commit-history-area file as
+a legitimate anchor for the bundled reorder/squash/amend/branch-from
+decision" without paying for a full claude-CLI e2e cycle.
+"""
+
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+E2E_DIR = Path(__file__).resolve().parent.parent / "tests" / "e2e"
+if str(E2E_DIR) not in sys.path:
+    sys.path.insert(0, str(E2E_DIR))
+
+# Importing the orchestrator triggers env-var checks (DESKTOP_REPO_PATH etc.)
+# and CLI presence checks that we don't want to fire in unit tests. Stub them
+# before import so the module loads without bailing out.
+import os  # noqa: E402
+
+os.environ.setdefault("DESKTOP_REPO_PATH", str(Path(__file__).resolve().parent))
+os.environ.setdefault("PATH", os.environ.get("PATH", ""))
+
+import shutil  # noqa: E402
+
+_orig_which = shutil.which
+
+
+def _which_stub(name: str, *args, **kwargs):
+    if name in ("claude", "bicameral-mcp"):
+        return f"/stub/{name}"
+    return _orig_which(name, *args, **kwargs)
+
+
+shutil.which = _which_stub  # type: ignore[assignment]
+try:
+    import run_e2e_flows  # noqa: E402
+finally:
+    shutil.which = _orig_which  # type: ignore[assignment]
+
+
+def _ingest_call(decisions: list[dict]) -> dict:
+    return {
+        "name": "mcp__bicameral__bicameral_ingest",
+        "input": {"payload": {"decisions": decisions}},
+    }
+
+
+def _ratify_call(decision_id: str) -> dict:
+    return {
+        "name": "mcp__bicameral__bicameral_ratify",
+        "input": {"decision_id": decision_id},
+    }
+
+
+def _seed_calls(commit_history_anchor: str) -> list[dict]:
+    """Standard Flow 1 sequence: ingest the 3 seed decisions with inline
+    bindings, then ratify each. ``commit_history_anchor`` is the file path
+    chosen for the bundled commit-history decision — varied across tests
+    to confirm the asserter accepts any legitimate area path.
+    """
+    decisions = [
+        {
+            "description": "High-signal notifications",
+            "code_regions": [{"file_path": "app/src/lib/stores/notifications-store.ts"}],
+        },
+        {
+            "description": "Improved commit history",
+            "code_regions": [{"file_path": commit_history_anchor}],
+        },
+        {
+            "description": "Cherry-pick between branches",
+            "code_regions": [{"file_path": "app/src/lib/git/cherry-pick.ts"}],
+        },
+    ]
+    return [
+        _ingest_call(decisions),
+        _ratify_call("decision:1"),
+        _ratify_call("decision:2"),
+        _ratify_call("decision:3"),
+    ]
+
+
+# ── Flow 1: feature-area binding ────────────────────────────────────────
+
+
+def test_flow1_passes_with_canonical_git_layer_anchor():
+    """The previously-required exact path — must still pass."""
+    calls = _seed_calls(commit_history_anchor="app/src/lib/git/reorder.ts")
+    ok, detail = run_e2e_flows.assert_flow_1(calls)
+    assert ok, f"Flow 1 should pass with canonical reorder.ts anchor; detail: {detail}"
+
+
+def test_flow1_passes_with_ui_layer_anchor():
+    """Previously failing case — agent picks UI-layer commit-list.tsx for the
+    bundled commit-history decision. Now accepted as a legitimate anchor."""
+    calls = _seed_calls(commit_history_anchor="app/src/ui/history/commit-list.tsx")
+    ok, detail = run_e2e_flows.assert_flow_1(calls)
+    assert ok, f"Flow 1 should accept commit-list.tsx as commit-history anchor; detail: {detail}"
+
+
+def test_flow1_passes_with_dispatcher_anchor():
+    """Dispatcher also backs the bundled ops (amend, branch-from)."""
+    calls = _seed_calls(commit_history_anchor="app/src/ui/dispatcher/dispatcher.ts")
+    ok, detail = run_e2e_flows.assert_flow_1(calls)
+    assert ok, f"Flow 1 should accept dispatcher.ts as commit-history anchor; detail: {detail}"
+
+
+def test_flow1_passes_with_squash_anchor():
+    """Bundled decision includes drag-to-squash; squash.ts is a legitimate anchor."""
+    calls = _seed_calls(commit_history_anchor="app/src/lib/git/squash.ts")
+    ok, detail = run_e2e_flows.assert_flow_1(calls)
+    assert ok, f"Flow 1 should accept squash.ts as commit-history anchor; detail: {detail}"
+
+
+def test_flow1_fails_when_commit_history_unbound():
+    """Bind something far from the commit-history area — asserter still fails."""
+    calls = _seed_calls(commit_history_anchor="app/src/lib/some-unrelated-file.ts")
+    ok, detail = run_e2e_flows.assert_flow_1(calls)
+    assert not ok, f"Flow 1 must fail when no commit-history-area file is bound; detail: {detail}"
+    assert "commit-history area" in detail
+
+
+def test_flow1_fails_when_cherry_pick_unbound():
+    """Replace cherry-pick.ts with something unrelated — asserter fails."""
+    decisions = [
+        {
+            "description": "High-signal notifications",
+            "code_regions": [{"file_path": "app/src/lib/stores/notifications-store.ts"}],
+        },
+        {
+            "description": "Improved commit history",
+            "code_regions": [{"file_path": "app/src/lib/git/reorder.ts"}],
+        },
+        {
+            "description": "Cherry-pick between branches",
+            "code_regions": [{"file_path": "app/src/lib/some-other-thing.ts"}],
+        },
+    ]
+    calls = [_ingest_call(decisions), _ratify_call("d1"), _ratify_call("d2"), _ratify_call("d3")]
+    ok, detail = run_e2e_flows.assert_flow_1(calls)
+    assert not ok
+    assert "cherry-pick area" in detail
+
+
+def test_flow1_accepts_cherry_pick_tsx():
+    """UI-layer cherry-pick.tsx is also a legitimate cherry-pick anchor."""
+    decisions = [
+        {
+            "description": "High-signal notifications",
+            "code_regions": [{"file_path": "app/src/lib/stores/notifications-store.ts"}],
+        },
+        {
+            "description": "Improved commit history",
+            "code_regions": [{"file_path": "app/src/lib/git/reorder.ts"}],
+        },
+        {
+            "description": "Cherry-pick between branches",
+            "code_regions": [{"file_path": "app/src/ui/multi-commit-operation/cherry-pick.tsx"}],
+        },
+    ]
+    calls = [_ingest_call(decisions), _ratify_call("d1"), _ratify_call("d2"), _ratify_call("d3")]
+    ok, detail = run_e2e_flows.assert_flow_1(calls)
+    assert ok, f"Flow 1 should accept cherry-pick.tsx; detail: {detail}"
+
+
+def test_flow1_fails_without_ratify():
+    """Even if bindings are fine, missing ratify still fails the asserter."""
+    calls = _seed_calls(commit_history_anchor="app/src/lib/git/reorder.ts")
+    # Drop the three ratify calls.
+    calls = [c for c in calls if "ratify" not in c["name"]]
+    ok, detail = run_e2e_flows.assert_flow_1(calls)
+    assert not ok
+    assert "ratify" in detail.lower()
diff --git a/tests/test_ephemeral_authoritative.py b/tests/test_ephemeral_authoritative.py
index fe628ffe..550fd813 100644
--- a/tests/test_ephemeral_authoritative.py
+++ b/tests/test_ephemeral_authoritative.py
@@ -40,6 +40,7 @@
   E21 — ungrounded → feature branch bind → reflected + ephemeral=True   [PASS]
   E22 — switch back to main: no stale ephemeral 'reflected' (→ drifted)  [PASS]
 """
+
 from __future__ import annotations
 
 import subprocess
@@ -55,7 +56,6 @@
 from handlers.link_commit import handle_link_commit, invalidate_sync_cache
 from handlers.resolve_compliance import handle_resolve_compliance
 
-
 # ── Helpers ───────────────────────────────────────────────────────────────────
 
 
@@ -98,7 +98,9 @@ def _merge(repo: Path, branch: str, *, squash: bool = False, no_ff: bool = False
         _git(repo, "merge", "--squash", branch)
         _git(repo, "-c", "commit.gpgsign=false", "commit", "-q", "-m", f"Squash-merge {branch}")
     elif no_ff:
-        _git(repo, "-c", "commit.gpgsign=false", "merge", "--no-ff", "-m", f"Merge {branch}", branch)
+        _git(
+            repo, "-c", "commit.gpgsign=false", "merge", "--no-ff", "-m", f"Merge {branch}", branch
+        )
     else:
         _git(repo, "-c", "commit.gpgsign=false", "merge", branch)
 
@@ -170,13 +172,18 @@ async def _ingest_and_bind(
     assert ingest.ingested, f"ingest failed: {ingest}"
     decision_id = ingest.created_decisions[0].decision_id
 
-    bind_resp = await handle_bind(ctx, [{
-        "decision_id": decision_id,
-        "file_path": file_path,
-        "symbol_name": symbol_name,
-        "start_line": start_line,
-        "end_line": end_line,
-    }])
+    bind_resp = await handle_bind(
+        ctx,
+        [
+            {
+                "decision_id": decision_id,
+                "file_path": file_path,
+                "symbol_name": symbol_name,
+                "start_line": start_line,
+                "end_line": end_line,
+            }
+        ],
+    )
     assert bind_resp.bindings, "no bind results"
     assert not bind_resp.bindings[0].error, f"bind error: {bind_resp.bindings[0].error}"
     return decision_id, bind_resp.bindings[0].region_id, bind_resp.bindings[0].content_hash
@@ -200,14 +207,16 @@ async def _resolve_verdict(
     return await handle_resolve_compliance(
         ctx,
         phase=phase,
-        verdicts=[{
-            "decision_id": decision_id,
-            "region_id": p.region_id,
-            "content_hash": p.content_hash,
-            "verdict": verdict,
-            "confidence": "high",
-            "explanation": "test",
-        }],
+        verdicts=[
+            {
+                "decision_id": decision_id,
+                "region_id": p.region_id,
+                "content_hash": p.content_hash,
+                "verdict": verdict,
+                "confidence": "high",
+                "explanation": "test",
+            }
+        ],
         flow_id=lc.flow_id,
     )
 
@@ -226,12 +235,15 @@ def _eph_repo(monkeypatch, tmp_path):
     monkeypatch.setenv("USE_REAL_LEDGER", "1")
     monkeypatch.setenv("SURREAL_URL", "memory://")
     repo = tmp_path / "repo"
-    _seed_repo(repo, {
-        "src/calc.py": """
+    _seed_repo(
+        repo,
+        {
+            "src/calc.py": """
             def rate(order_total: float) -> float:
                 return order_total * 0.1
         """,
-    })
+        },
+    )
     monkeypatch.setenv("REPO_PATH", str(repo))
     monkeypatch.setenv("BICAMERAL_AUTHORITATIVE_REF", "main")
     monkeypatch.chdir(repo)
@@ -259,13 +271,21 @@ async def test_e01_authoritative_branch_full_cycle(_eph_repo):
     # Ingest with code_regions so the binding exists before the internal link_commit.
     ingest = await handle_ingest(
         ctx,
-        _payload(repo, text="10% discount rule", intent="Apply 10% discount on all orders",
-                 code_regions=[{
-                     "file_path": "src/calc.py",
-                     "symbol": "rate",
-                     "start_line": 1, "end_line": 2,
-                     "type": "function", "purpose": "rate calc",
-                 }]),
+        _payload(
+            repo,
+            text="10% discount rule",
+            intent="Apply 10% discount on all orders",
+            code_regions=[
+                {
+                    "file_path": "src/calc.py",
+                    "symbol": "rate",
+                    "start_line": 1,
+                    "end_line": 2,
+                    "type": "function",
+                    "purpose": "rate calc",
+                }
+            ],
+        ),
     )
     assert ingest.ingested
     decision_id = ingest.created_decisions[0].decision_id
@@ -314,13 +334,21 @@ async def test_e02_feature_branch_full_cycle(_eph_repo):
     # Ingest on the feature branch — code_regions reference the original file on main.
     ingest = await handle_ingest(
         ctx,
-        _payload(repo, text="Pricing rate", intent="Apply rate to order total",
-                 code_regions=[{
-                     "file_path": "src/calc.py",
-                     "symbol": "rate",
-                     "start_line": 1, "end_line": 2,
-                     "type": "function", "purpose": "rate calc",
-                 }]),
+        _payload(
+            repo,
+            text="Pricing rate",
+            intent="Apply rate to order total",
+            code_regions=[
+                {
+                    "file_path": "src/calc.py",
+                    "symbol": "rate",
+                    "start_line": 1,
+                    "end_line": 2,
+                    "type": "function",
+                    "purpose": "rate calc",
+                }
+            ],
+        ),
     )
     assert ingest.ingested
     decision_id = ingest.created_decisions[0].decision_id
@@ -371,13 +399,21 @@ async def test_e03_ff_merge_verdict_survives(_eph_repo):
     ctx = BicameralContext.from_env()
     ingest = await handle_ingest(
         ctx,
-        _payload(repo, text="Pricing", intent="Apply rate",
-                 code_regions=[{
-                     "file_path": "src/calc.py",
-                     "symbol": "rate",
-                     "start_line": 1, "end_line": 2,
-                     "type": "function", "purpose": "rate",
-                 }]),
+        _payload(
+            repo,
+            text="Pricing",
+            intent="Apply rate",
+            code_regions=[
+                {
+                    "file_path": "src/calc.py",
+                    "symbol": "rate",
+                    "start_line": 1,
+                    "end_line": 2,
+                    "type": "function",
+                    "purpose": "rate",
+                }
+            ],
+        ),
     )
     assert ingest.ingested
     decision_id = ingest.created_decisions[0].decision_id
@@ -400,9 +436,7 @@ async def test_e03_ff_merge_verdict_survives(_eph_repo):
     )
     # No new pending compliance check for this decision (verdict already exists).
     new_pending = [p for p in lc_main.pending_compliance_checks if p.decision_id == decision_id]
-    assert not new_pending, (
-        f"Should not re-pend after FF merge with same hash, got: {new_pending}"
-    )
+    assert not new_pending, f"Should not re-pend after FF merge with same hash, got: {new_pending}"
 
 
 # ── E4: Squash merge → same content hash → reflected ──────────────────────────
@@ -429,13 +463,21 @@ async def test_e04_squash_merge_verdict_survives(_eph_repo):
     ctx = BicameralContext.from_env()
     ingest = await handle_ingest(
         ctx,
-        _payload(repo, text="Rate policy", intent="Set 18% rate",
-                 code_regions=[{
-                     "file_path": "src/calc.py",
-                     "symbol": "rate",
-                     "start_line": 1, "end_line": 2,
-                     "type": "function", "purpose": "rate",
-                 }]),
+        _payload(
+            repo,
+            text="Rate policy",
+            intent="Set 18% rate",
+            code_regions=[
+                {
+                    "file_path": "src/calc.py",
+                    "symbol": "rate",
+                    "start_line": 1,
+                    "end_line": 2,
+                    "type": "function",
+                    "purpose": "rate",
+                }
+            ],
+        ),
     )
     decision_id = ingest.created_decisions[0].decision_id
     lc = await handle_link_commit(ctx, "HEAD")
@@ -479,13 +521,21 @@ async def test_e05_content_change_becomes_drifted(_eph_repo):
 
     ingest = await handle_ingest(
         ctx,
-        _payload(repo, text="10% discount rule", intent="Apply 10% rate",
-                 code_regions=[{
-                     "file_path": "src/calc.py",
-                     "symbol": "rate",
-                     "start_line": 1, "end_line": 2,
-                     "type": "function", "purpose": "rate",
-                 }]),
+        _payload(
+            repo,
+            text="10% discount rule",
+            intent="Apply 10% rate",
+            code_regions=[
+                {
+                    "file_path": "src/calc.py",
+                    "symbol": "rate",
+                    "start_line": 1,
+                    "end_line": 2,
+                    "type": "function",
+                    "purpose": "rate",
+                }
+            ],
+        ),
     )
     decision_id = ingest.created_decisions[0].decision_id
     lc1 = await handle_link_commit(ctx, "HEAD")
@@ -546,13 +596,21 @@ async def test_e06_branch_switch_stale_not_cleared(_eph_repo):
     ctx = BicameralContext.from_env()
     ingest = await handle_ingest(
         ctx,
-        _payload(repo, text="Rate policy", intent="Apply 15% rate",
-                 code_regions=[{
-                     "file_path": "src/calc.py",
-                     "symbol": "rate",
-                     "start_line": 1, "end_line": 2,
-                     "type": "function", "purpose": "rate",
-                 }]),
+        _payload(
+            repo,
+            text="Rate policy",
+            intent="Apply 15% rate",
+            code_regions=[
+                {
+                    "file_path": "src/calc.py",
+                    "symbol": "rate",
+                    "start_line": 1,
+                    "end_line": 2,
+                    "type": "function",
+                    "purpose": "rate",
+                }
+            ],
+        ),
     )
     decision_id = ingest.created_decisions[0].decision_id
     lc_a = await handle_link_commit(ctx, "HEAD")
@@ -602,13 +660,21 @@ async def test_e07_feature_to_main_ephemeral_not_promoted(_eph_repo):
     ctx = BicameralContext.from_env()
     ingest = await handle_ingest(
         ctx,
-        _payload(repo, text="Rate", intent="11% rate",
-                 code_regions=[{
-                     "file_path": "src/calc.py",
-                     "symbol": "rate",
-                     "start_line": 1, "end_line": 2,
-                     "type": "function", "purpose": "rate",
-                 }]),
+        _payload(
+            repo,
+            text="Rate",
+            intent="11% rate",
+            code_regions=[
+                {
+                    "file_path": "src/calc.py",
+                    "symbol": "rate",
+                    "start_line": 1,
+                    "end_line": 2,
+                    "type": "function",
+                    "purpose": "rate",
+                }
+            ],
+        ),
     )
     decision_id = ingest.created_decisions[0].decision_id
     lc = await handle_link_commit(ctx, "HEAD")
@@ -657,13 +723,21 @@ async def test_e08_detached_head_non_ephemeral(_eph_repo):
     ctx = BicameralContext.from_env()
     ingest = await handle_ingest(
         ctx,
-        _payload(repo, text="Rate", intent="Rate policy",
-                 code_regions=[{
-                     "file_path": "src/calc.py",
-                     "symbol": "rate",
-                     "start_line": 1, "end_line": 2,
-                     "type": "function", "purpose": "rate",
-                 }]),
+        _payload(
+            repo,
+            text="Rate",
+            intent="Rate policy",
+            code_regions=[
+                {
+                    "file_path": "src/calc.py",
+                    "symbol": "rate",
+                    "start_line": 1,
+                    "end_line": 2,
+                    "type": "function",
+                    "purpose": "rate",
+                }
+            ],
+        ),
     )
     decision_id = ingest.created_decisions[0].decision_id
     lc = await handle_link_commit(ctx, "HEAD")
@@ -706,13 +780,21 @@ async def test_e09_process_restart_flag_lost_status_ok(_eph_repo):
     ctx = BicameralContext.from_env()
     ingest = await handle_ingest(
         ctx,
-        _payload(repo, text="Rate", intent="13% rate",
-                 code_regions=[{
-                     "file_path": "src/calc.py",
-                     "symbol": "rate",
-                     "start_line": 1, "end_line": 2,
-                     "type": "function", "purpose": "rate",
-                 }]),
+        _payload(
+            repo,
+            text="Rate",
+            intent="13% rate",
+            code_regions=[
+                {
+                    "file_path": "src/calc.py",
+                    "symbol": "rate",
+                    "start_line": 1,
+                    "end_line": 2,
+                    "type": "function",
+                    "purpose": "rate",
+                }
+            ],
+        ),
     )
     decision_id = ingest.created_decisions[0].decision_id
     lc = await handle_link_commit(ctx, "HEAD")
@@ -731,22 +813,22 @@ async def test_e09_process_restart_flag_lost_status_ok(_eph_repo):
     rc = await handle_resolve_compliance(
         ctx2,
         phase="ingest",
-        verdicts=[{
-            "decision_id": decision_id,
-            "region_id": pending[0].region_id,
-            "content_hash": pending[0].content_hash,
-            "verdict": "compliant",
-            "confidence": "high",
-            "explanation": "post-restart",
-        }],
+        verdicts=[
+            {
+                "decision_id": decision_id,
+                "region_id": pending[0].region_id,
+                "content_hash": pending[0].content_hash,
+                "verdict": "compliant",
+                "confidence": "high",
+                "explanation": "post-restart",
+            }
+        ],
         # No flow_id — simulating process restart
     )
     assert rc.accepted, f"resolve rejected post-restart: {rc.rejected}"
 
     status = await _get_decision_status(ctx2, decision_id)
-    assert status == "reflected", (
-        f"Status must be reflected after restart, got {status}"
-    )
+    assert status == "reflected", f"Status must be reflected after restart, got {status}"
 
     checks = await _get_compliance_checks(ctx2, decision_id)
     assert checks
@@ -778,29 +860,41 @@ async def test_e10_idempotent_resolve_compliance(_eph_repo):
 
     ingest = await handle_ingest(
         ctx,
-        _payload(repo, text="Discount rate", intent="Apply rate",
-                 code_regions=[{
-                     "file_path": "src/calc.py",
-                     "symbol": "rate",
-                     "start_line": 1, "end_line": 2,
-                     "type": "function", "purpose": "rate",
-                 }]),
+        _payload(
+            repo,
+            text="Discount rate",
+            intent="Apply rate",
+            code_regions=[
+                {
+                    "file_path": "src/calc.py",
+                    "symbol": "rate",
+                    "start_line": 1,
+                    "end_line": 2,
+                    "type": "function",
+                    "purpose": "rate",
+                }
+            ],
+        ),
     )
     decision_id = ingest.created_decisions[0].decision_id
     lc = await handle_link_commit(ctx, "HEAD")
     pending = [p for p in lc.pending_compliance_checks if p.decision_id == decision_id]
     assert pending
 
-    verdict_payload = [{
-        "decision_id": decision_id,
-        "region_id": pending[0].region_id,
-        "content_hash": pending[0].content_hash,
-        "verdict": "compliant",
-        "confidence": "high",
-        "explanation": "first call",
-    }]
+    verdict_payload = [
+        {
+            "decision_id": decision_id,
+            "region_id": pending[0].region_id,
+            "content_hash": pending[0].content_hash,
+            "verdict": "compliant",
+            "confidence": "high",
+            "explanation": "first call",
+        }
+    ]
 
-    rc1 = await handle_resolve_compliance(ctx, phase="ingest", verdicts=verdict_payload, flow_id=lc.flow_id)
+    rc1 = await handle_resolve_compliance(
+        ctx, phase="ingest", verdicts=verdict_payload, flow_id=lc.flow_id
+    )
     assert rc1.accepted
 
     # Second call with same payload — must succeed silently.
@@ -842,13 +936,21 @@ async def test_e11_flow_id_mismatch_ephemeral_false_status_ok(_eph_repo):
     ctx = BicameralContext.from_env()
     ingest = await handle_ingest(
         ctx,
-        _payload(repo, text="Rate 14%", intent="Apply 14% rate",
-                 code_regions=[{
-                     "file_path": "src/calc.py",
-                     "symbol": "rate",
-                     "start_line": 1, "end_line": 2,
-                     "type": "function", "purpose": "rate",
-                 }]),
+        _payload(
+            repo,
+            text="Rate 14%",
+            intent="Apply 14% rate",
+            code_regions=[
+                {
+                    "file_path": "src/calc.py",
+                    "symbol": "rate",
+                    "start_line": 1,
+                    "end_line": 2,
+                    "type": "function",
+                    "purpose": "rate",
+                }
+            ],
+        ),
     )
     decision_id = ingest.created_decisions[0].decision_id
     lc = await handle_link_commit(ctx, "HEAD")
@@ -861,14 +963,16 @@ async def test_e11_flow_id_mismatch_ephemeral_false_status_ok(_eph_repo):
     rc = await handle_resolve_compliance(
         ctx,
         phase="ingest",
-        verdicts=[{
-            "decision_id": decision_id,
-            "region_id": pending[0].region_id,
-            "content_hash": pending[0].content_hash,
-            "verdict": "compliant",
-            "confidence": "high",
-            "explanation": "stale flow",
-        }],
+        verdicts=[
+            {
+                "decision_id": decision_id,
+                "region_id": pending[0].region_id,
+                "content_hash": pending[0].content_hash,
+                "verdict": "compliant",
+                "confidence": "high",
+                "explanation": "stale flow",
+            }
+        ],
         flow_id=stale_flow_id,
     )
     assert rc.accepted, f"Expected accepted despite flow_id mismatch, got: {rc.rejected}"
@@ -919,13 +1023,21 @@ async def test_e12_feature_branch_reflected_drift_not_detected(_eph_repo):
     # calc.py IS in changed_files → pending check surfaced → we can verify it.
     ingest = await handle_ingest(
         ctx,
-        _payload(repo, text="Rate 20%", intent="Rate policy",
-                 code_regions=[{
-                     "file_path": "src/calc.py",
-                     "symbol": "rate",
-                     "start_line": 1, "end_line": 2,
-                     "type": "function", "purpose": "rate",
-                 }]),
+        _payload(
+            repo,
+            text="Rate 20%",
+            intent="Rate policy",
+            code_regions=[
+                {
+                    "file_path": "src/calc.py",
+                    "symbol": "rate",
+                    "start_line": 1,
+                    "end_line": 2,
+                    "type": "function",
+                    "purpose": "rate",
+                }
+            ],
+        ),
     )
     decision_id = ingest.created_decisions[0].decision_id
     lc1 = await handle_link_commit(ctx, "HEAD")
@@ -989,13 +1101,21 @@ async def test_e13_rebase_same_hash_verdict_survives(_eph_repo):
     ctx = BicameralContext.from_env()
     ingest = await handle_ingest(
         ctx,
-        _payload(repo, text="Tax calc", intent="Compute 7% tax",
-                 code_regions=[{
-                     "file_path": "src/calc.py",
-                     "symbol": "tax",
-                     "start_line": 4, "end_line": 5,
-                     "type": "function", "purpose": "tax",
-                 }]),
+        _payload(
+            repo,
+            text="Tax calc",
+            intent="Compute 7% tax",
+            code_regions=[
+                {
+                    "file_path": "src/calc.py",
+                    "symbol": "tax",
+                    "start_line": 4,
+                    "end_line": 5,
+                    "type": "function",
+                    "purpose": "tax",
+                }
+            ],
+        ),
     )
     decision_id = ingest.created_decisions[0].decision_id
     lc1 = await handle_link_commit(ctx, "HEAD")
@@ -1062,13 +1182,21 @@ async def test_e14_deleted_branch_verdict_survives(_eph_repo):
     ctx = BicameralContext.from_env()
     ingest = await handle_ingest(
         ctx,
-        _payload(repo, text="Rate 16%", intent="16% rate",
-                 code_regions=[{
-                     "file_path": "src/calc.py",
-                     "symbol": "rate",
-                     "start_line": 1, "end_line": 2,
-                     "type": "function", "purpose": "rate",
-                 }]),
+        _payload(
+            repo,
+            text="Rate 16%",
+            intent="16% rate",
+            code_regions=[
+                {
+                    "file_path": "src/calc.py",
+                    "symbol": "rate",
+                    "start_line": 1,
+                    "end_line": 2,
+                    "type": "function",
+                    "purpose": "rate",
+                }
+            ],
+        ),
     )
     decision_id = ingest.created_decisions[0].decision_id
     lc = await handle_link_commit(ctx, "HEAD")
@@ -1136,13 +1264,21 @@ async def test_e15_custom_authoritative_ref_non_ephemeral(_eph_repo, monkeypatch
 
     ingest = await handle_ingest(
         ctx,
-        _payload(repo, text="Rate 19%", intent="19% rate on develop",
-                 code_regions=[{
-                     "file_path": "src/calc.py",
-                     "symbol": "rate",
-                     "start_line": 1, "end_line": 2,
-                     "type": "function", "purpose": "rate",
-                 }]),
+        _payload(
+            repo,
+            text="Rate 19%",
+            intent="19% rate on develop",
+            code_regions=[
+                {
+                    "file_path": "src/calc.py",
+                    "symbol": "rate",
+                    "start_line": 1,
+                    "end_line": 2,
+                    "type": "function",
+                    "purpose": "rate",
+                }
+            ],
+        ),
     )
     decision_id = ingest.created_decisions[0].decision_id
     lc = await handle_link_commit(ctx, "HEAD")
@@ -1191,25 +1327,29 @@ async def test_e16_resolve_compliance_without_link_commit(_eph_repo):
     ctx = BicameralContext.from_env()
 
     decision_id, region_id, bind_hash = await _ingest_and_bind(
-        ctx, repo,
+        ctx,
+        repo,
         intent="Direct resolve no link_commit",
         file_path="src/calc.py",
         symbol_name="rate",
-        start_line=1, end_line=2,
+        start_line=1,
+        end_line=2,
     )
 
     # Call resolve_compliance directly (no link_commit, no flow_id).
     rc = await handle_resolve_compliance(
         ctx,
         phase="ingest",
-        verdicts=[{
-            "decision_id": decision_id,
-            "region_id": region_id,
-            "content_hash": bind_hash,
-            "verdict": "compliant",
-            "confidence": "high",
-            "explanation": "direct resolve",
-        }],
+        verdicts=[
+            {
+                "decision_id": decision_id,
+                "region_id": region_id,
+                "content_hash": bind_hash,
+                "verdict": "compliant",
+                "confidence": "high",
+                "explanation": "direct resolve",
+            }
+        ],
     )
     assert rc.accepted, f"Direct resolve rejected: {rc.rejected}"
 
@@ -1243,13 +1383,21 @@ async def test_e17_ephemeral_first_write_wins_flag_stuck(_eph_repo):
     ctx = BicameralContext.from_env()
     ingest = await handle_ingest(
         ctx,
-        _payload(repo, text="Rate 17%", intent="17% rate",
-                 code_regions=[{
-                     "file_path": "src/calc.py",
-                     "symbol": "rate",
-                     "start_line": 1, "end_line": 2,
-                     "type": "function", "purpose": "rate",
-                 }]),
+        _payload(
+            repo,
+            text="Rate 17%",
+            intent="17% rate",
+            code_regions=[
+                {
+                    "file_path": "src/calc.py",
+                    "symbol": "rate",
+                    "start_line": 1,
+                    "end_line": 2,
+                    "type": "function",
+                    "purpose": "rate",
+                }
+            ],
+        ),
     )
     decision_id = ingest.created_decisions[0].decision_id
     lc_feat = await handle_link_commit(ctx, "HEAD")
@@ -1275,14 +1423,16 @@ async def test_e17_ephemeral_first_write_wins_flag_stuck(_eph_repo):
     rc_main = await handle_resolve_compliance(
         ctx,
         phase="drift",
-        verdicts=[{
-            "decision_id": decision_id,
-            "region_id": pending[0].region_id,
-            "content_hash": feature_hash,
-            "verdict": "compliant",
-            "confidence": "high",
-            "explanation": "main confirmation",
-        }],
+        verdicts=[
+            {
+                "decision_id": decision_id,
+                "region_id": pending[0].region_id,
+                "content_hash": feature_hash,
+                "verdict": "compliant",
+                "confidence": "high",
+                "explanation": "main confirmation",
+            }
+        ],
         # No flow_id — ctx is on main, no pending_ephemeral in sync_state
     )
     assert rc_main.accepted
@@ -1318,9 +1468,7 @@ async def test_e18_bind_branch_local_file(_eph_repo):
     repo = _eph_repo
 
     _checkout(repo, "feat/new-module", create=True)
-    (repo / "src/new_module.py").write_text(
-        "def compute(x: int) -> int:\n    return x * 2\n"
-    )
+    (repo / "src/new_module.py").write_text("def compute(x: int) -> int:\n    return x * 2\n")
     _commit(repo, "add new_module.py (branch-only file)")
 
     ctx = BicameralContext.from_env()
@@ -1332,19 +1480,22 @@ async def test_e18_bind_branch_local_file(_eph_repo):
     assert ingest.ingested
     decision_id = ingest.created_decisions[0].decision_id
 
-    bind_resp = await handle_bind(ctx, [{
-        "decision_id": decision_id,
-        "file_path": "src/new_module.py",
-        "symbol_name": "compute",
-        "start_line": 1,
-        "end_line": 2,
-    }])
+    bind_resp = await handle_bind(
+        ctx,
+        [
+            {
+                "decision_id": decision_id,
+                "file_path": "src/new_module.py",
+                "symbol_name": "compute",
+                "start_line": 1,
+                "end_line": 2,
+            }
+        ],
+    )
 
     assert bind_resp.bindings, "no bind results"
     b = bind_resp.bindings[0]
-    assert not b.error, (
-        f"bind must succeed for a branch-local file; got error: {b.error}"
-    )
+    assert not b.error, f"bind must succeed for a branch-local file; got error: {b.error}"
     assert b.content_hash, "content_hash must be non-empty after successful bind"
 
 
@@ -1395,13 +1546,18 @@ async def test_e19_bind_modified_function_uses_branch_hash(_eph_repo):
     assert ingest.ingested
     decision_id = ingest.created_decisions[0].decision_id
 
-    bind_resp = await handle_bind(ctx, [{
-        "decision_id": decision_id,
-        "file_path": "src/calc.py",
-        "symbol_name": "rate",
-        "start_line": 1,
-        "end_line": 2,
-    }])
+    bind_resp = await handle_bind(
+        ctx,
+        [
+            {
+                "decision_id": decision_id,
+                "file_path": "src/calc.py",
+                "symbol_name": "rate",
+                "start_line": 1,
+                "end_line": 2,
+            }
+        ],
+    )
 
     assert bind_resp.bindings, "no bind results"
     b = bind_resp.bindings[0]
@@ -1462,13 +1618,18 @@ async def test_e20_bind_link_commit_hash_consistency_no_phantom_drift(_eph_repo)
     assert ingest.ingested
     decision_id = ingest.created_decisions[0].decision_id
 
-    bind_resp = await handle_bind(ctx, [{
-        "decision_id": decision_id,
-        "file_path": "src/calc.py",
-        "symbol_name": "rate",
-        "start_line": 1,
-        "end_line": 2,
-    }])
+    bind_resp = await handle_bind(
+        ctx,
+        [
+            {
+                "decision_id": decision_id,
+                "file_path": "src/calc.py",
+                "symbol_name": "rate",
+                "start_line": 1,
+                "end_line": 2,
+            }
+        ],
+    )
     assert bind_resp.bindings and not bind_resp.bindings[0].error
     bind_hash = bind_resp.bindings[0].content_hash
     assert bind_hash, "bind must return content_hash"
@@ -1482,7 +1643,7 @@ async def test_e20_bind_link_commit_hash_consistency_no_phantom_drift(_eph_repo)
     # First link_commit: surfaces pending check at H_branch.
     lc1 = await handle_link_commit(ctx, "HEAD")
     pending = [p for p in lc1.pending_compliance_checks if p.decision_id == decision_id]
-    assert pending, f"link_commit must surface pending check for the bound decision"
+    assert pending, "link_commit must surface pending check for the bound decision"
     assert pending[0].content_hash == bind_hash, (
         f"pending_check.content_hash ({pending[0].content_hash[:8]}) must equal "
         f"bind_result.content_hash ({bind_hash[:8]}) — hash consistency invariant"
@@ -1492,14 +1653,16 @@ async def test_e20_bind_link_commit_hash_consistency_no_phantom_drift(_eph_repo)
     rc = await handle_resolve_compliance(
         ctx,
         phase="ingest",
-        verdicts=[{
-            "decision_id": decision_id,
-            "region_id": pending[0].region_id,
-            "content_hash": pending[0].content_hash,
-            "verdict": "compliant",
-            "confidence": "high",
-            "explanation": "branch content verified",
-        }],
+        verdicts=[
+            {
+                "decision_id": decision_id,
+                "region_id": pending[0].region_id,
+                "content_hash": pending[0].content_hash,
+                "verdict": "compliant",
+                "confidence": "high",
+                "explanation": "branch content verified",
+            }
+        ],
         flow_id=lc1.flow_id,
     )
     assert rc.accepted, f"resolve_compliance rejected: {rc.rejected}"
@@ -1565,8 +1728,7 @@ async def test_e21_ungrounded_feature_bind_reflected_ephemeral(_eph_repo):
     # Engineer creates feature branch and writes the implementation.
     _checkout(repo, "feat/cap-discount", create=True)
     (repo / "src/calc.py").write_text(
-        "def rate(order_total: float) -> float:\n"
-        "    return min(order_total * 0.30, order_total)\n"
+        "def rate(order_total: float) -> float:\n    return min(order_total * 0.30, order_total)\n"
     )
     _commit(repo, "cap discount at 30% (feat/cap-discount)")
 
@@ -1579,13 +1741,18 @@ async def test_e21_ungrounded_feature_bind_reflected_ephemeral(_eph_repo):
     )
 
     # Bind to the implementation on the feature branch.
-    bind_resp = await handle_bind(ctx_feat, [{
-        "decision_id": decision_id,
-        "file_path": "src/calc.py",
-        "symbol_name": "rate",
-        "start_line": 1,
-        "end_line": 2,
-    }])
+    bind_resp = await handle_bind(
+        ctx_feat,
+        [
+            {
+                "decision_id": decision_id,
+                "file_path": "src/calc.py",
+                "symbol_name": "rate",
+                "start_line": 1,
+                "end_line": 2,
+            }
+        ],
+    )
     assert bind_resp.bindings and not bind_resp.bindings[0].error, (
         f"bind must succeed on feature branch: "
         f"{bind_resp.bindings[0].error if bind_resp.bindings else 'no results'}"
@@ -1607,14 +1774,16 @@ async def test_e21_ungrounded_feature_bind_reflected_ephemeral(_eph_repo):
     rc = await handle_resolve_compliance(
         ctx_feat,
         phase="ingest",
-        verdicts=[{
-            "decision_id": decision_id,
-            "region_id": pending[0].region_id,
-            "content_hash": pending[0].content_hash,
-            "verdict": "compliant",
-            "confidence": "high",
-            "explanation": "cap implementation verified",
-        }],
+        verdicts=[
+            {
+                "decision_id": decision_id,
+                "region_id": pending[0].region_id,
+                "content_hash": pending[0].content_hash,
+                "verdict": "compliant",
+                "confidence": "high",
+                "explanation": "cap implementation verified",
+            }
+        ],
         flow_id=lc.flow_id,
     )
     assert rc.accepted, f"resolve_compliance rejected: {rc.rejected}"
@@ -1675,21 +1844,25 @@ async def test_e22_switch_to_main_no_stale_reflected(_eph_repo):
     # Feature branch: implement + bind + resolve → reflected, ephemeral=True.
     _checkout(repo, "feat/cap-v2", create=True)
     (repo / "src/calc.py").write_text(
-        "def rate(order_total: float) -> float:\n"
-        "    return min(order_total * 0.30, order_total)\n"
+        "def rate(order_total: float) -> float:\n    return min(order_total * 0.30, order_total)\n"
     )
     _commit(repo, "cap at 30%")
 
     # Fresh ctx on the feature branch.
     ctx_feat = BicameralContext.from_env()
 
-    bind_resp = await handle_bind(ctx_feat, [{
-        "decision_id": decision_id,
-        "file_path": "src/calc.py",
-        "symbol_name": "rate",
-        "start_line": 1,
-        "end_line": 2,
-    }])
+    bind_resp = await handle_bind(
+        ctx_feat,
+        [
+            {
+                "decision_id": decision_id,
+                "file_path": "src/calc.py",
+                "symbol_name": "rate",
+                "start_line": 1,
+                "end_line": 2,
+            }
+        ],
+    )
     assert bind_resp.bindings and not bind_resp.bindings[0].error
 
     lc_feat = await handle_link_commit(ctx_feat, "HEAD")
@@ -1700,14 +1873,16 @@ async def test_e22_switch_to_main_no_stale_reflected(_eph_repo):
     rc = await handle_resolve_compliance(
         ctx_feat,
         phase="ingest",
-        verdicts=[{
-            "decision_id": decision_id,
-            "region_id": pending[0].region_id,
-            "content_hash": pending[0].content_hash,
-            "verdict": "compliant",
-            "confidence": "high",
-            "explanation": "verified on branch",
-        }],
+        verdicts=[
+            {
+                "decision_id": decision_id,
+                "region_id": pending[0].region_id,
+                "content_hash": pending[0].content_hash,
+                "verdict": "compliant",
+                "confidence": "high",
+                "explanation": "verified on branch",
+            }
+        ],
         flow_id=lc_feat.flow_id,
     )
     assert rc.accepted
diff --git a/tests/test_extract_headless.py b/tests/test_extract_headless.py
index a7d5878e..9916be67 100644
--- a/tests/test_extract_headless.py
+++ b/tests/test_extract_headless.py
@@ -8,6 +8,7 @@
 Network-dependent end-to-end tests live in CI only, gated on
 ANTHROPIC_API_KEY being present.
 """
+
 from __future__ import annotations
 
 import json
@@ -101,9 +102,7 @@ def test_cache_hit_returns_without_auth(monkeypatch):
 
     monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
     try:
-        result = extract_from_current_skill(
-            transcript, source_ref="test", skill_md_path=skill_md
-        )
+        result = extract_from_current_skill(transcript, source_ref="test", skill_md_path=skill_md)
     finally:
         cache_file.unlink(missing_ok=True)
 
diff --git a/tests/test_extraction_metrics.py b/tests/test_extraction_metrics.py
index fb9a9d9b..56028090 100644
--- a/tests/test_extraction_metrics.py
+++ b/tests/test_extraction_metrics.py
@@ -3,6 +3,7 @@
 Exercises the fuzzy matching, 1:1 assignment, and aggregate math with
 synthetic extracted/fixture pairs. No network, no fixture files on disk.
 """
+
 from __future__ import annotations
 
 import sys
@@ -33,14 +34,18 @@ def test_skipped_when_fixture_absent():
 
 
 def test_perfect_match_is_p1_r1_f1_1():
-    fixture = _f([
-        "Add 12-second timeout to payment authorize calls",
-        "Emit payment.timeout event via EventBus",
-    ])
-    extracted = _e([
-        "Add 12-second timeout to payment authorize calls",
-        "Emit payment.timeout event via EventBus",
-    ])
+    fixture = _f(
+        [
+            "Add 12-second timeout to payment authorize calls",
+            "Emit payment.timeout event via EventBus",
+        ]
+    )
+    extracted = _e(
+        [
+            "Add 12-second timeout to payment authorize calls",
+            "Emit payment.timeout event via EventBus",
+        ]
+    )
     out = compute_extraction_metrics(extracted, fixture, matcher="rapidfuzz")
     assert out["skipped"] is False
     assert out["true_positives"] == 2
@@ -74,16 +79,20 @@ def test_low_similarity_is_false_positive_and_false_negative():
 
 
 def test_partial_match_mixed_precision_and_recall():
-    fixture = _f([
-        "Add timeout to authorize calls",
-        "Emit timeout event via EventBus",
-        "Drop garbage provider responses",
-    ])
-    extracted = _e([
-        "Add timeout to authorize calls",   # TP
-        "Drop garbage provider responses",  # TP
-        "Use circuit breaker for rate limiting",  # FP
-    ])
+    fixture = _f(
+        [
+            "Add timeout to authorize calls",
+            "Emit timeout event via EventBus",
+            "Drop garbage provider responses",
+        ]
+    )
+    extracted = _e(
+        [
+            "Add timeout to authorize calls",  # TP
+            "Drop garbage provider responses",  # TP
+            "Use circuit breaker for rate limiting",  # FP
+        ]
+    )
     out = compute_extraction_metrics(extracted, fixture, matcher="rapidfuzz")
     assert out["true_positives"] == 2
     assert out["false_positives"] == 1
@@ -95,10 +104,12 @@ def test_partial_match_mixed_precision_and_recall():
 def test_one_to_one_matching_prevents_double_counting():
     """If two extracted items both look like one fixture item, only one wins."""
     fixture = _f(["Add 12-second timeout to payment authorize calls"])
-    extracted = _e([
-        "Add 12-second timeout to payment authorize calls",
-        "Add a 12-second timeout to authorize calls in payments",  # very similar
-    ])
+    extracted = _e(
+        [
+            "Add 12-second timeout to payment authorize calls",
+            "Add a 12-second timeout to authorize calls in payments",  # very similar
+        ]
+    )
     out = compute_extraction_metrics(extracted, fixture, matcher="rapidfuzz")
     assert out["true_positives"] == 1  # not 2
     assert out["false_positives"] == 1  # the second one doesn't match anything new
@@ -109,13 +120,21 @@ def test_aggregate_sums_across_scored_and_ignores_skipped():
     per_transcript = [
         {
             "skipped": False,
-            "true_positives": 3, "false_positives": 1, "false_negatives": 2,
-            "precision": 0.75, "recall": 0.6, "f1": 0.667,
+            "true_positives": 3,
+            "false_positives": 1,
+            "false_negatives": 2,
+            "precision": 0.75,
+            "recall": 0.6,
+            "f1": 0.667,
         },
         {
             "skipped": False,
-            "true_positives": 5, "false_positives": 0, "false_negatives": 1,
-            "precision": 1.0, "recall": 0.833, "f1": 0.909,
+            "true_positives": 5,
+            "false_positives": 0,
+            "false_negatives": 1,
+            "precision": 1.0,
+            "recall": 0.833,
+            "f1": 0.909,
         },
         {"skipped": True, "reason": "no fixture"},
     ]
@@ -126,8 +145,8 @@ def test_aggregate_sums_across_scored_and_ignores_skipped():
     assert out["false_positives"] == 1
     assert out["false_negatives"] == 3
     # precision = 8/9, recall = 8/11
-    assert abs(out["precision"] - 8/9) < 1e-3
-    assert abs(out["recall"] - 8/11) < 1e-3
+    assert abs(out["precision"] - 8 / 9) < 1e-3
+    assert abs(out["recall"] - 8 / 11) < 1e-3
 
 
 def test_aggregate_all_skipped_returns_skipped():
@@ -153,18 +172,21 @@ def test_empty_extraction_and_empty_fixture_gives_zero_not_error():
 
 def test_pick_matcher_auto_picks_llm_when_key_present(monkeypatch):
     from _extraction_metrics import _pick_matcher
+
     monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-api03-fake")
     assert _pick_matcher("auto") == "llm"
 
 
 def test_pick_matcher_auto_falls_back_to_rapidfuzz(monkeypatch):
     from _extraction_metrics import _pick_matcher
+
     monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
     assert _pick_matcher("auto") == "rapidfuzz"
 
 
 def test_pick_matcher_explicit_overrides_env(monkeypatch):
     from _extraction_metrics import _pick_matcher
+
     monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-api03-fake")
     assert _pick_matcher("rapidfuzz") == "rapidfuzz"
 
@@ -227,8 +249,8 @@ def test_llm_match_parses_valid_response_into_pairs():
 def test_compute_extraction_metrics_dispatches_to_llm(monkeypatch):
     """When matcher='llm', compute_extraction_metrics calls llm_match
     instead of rapidfuzz. We stub llm_match so no network is needed."""
-    import _extraction_metrics
     import _extraction_matcher
+    import _extraction_metrics
 
     actual = _e(["X", "Y", "Z"])
     fixture = _f(["P", "Q"])
diff --git a/tests/test_flow4_ledger_validation.py b/tests/test_flow4_ledger_validation.py
new file mode 100644
index 00000000..22d1b226
--- /dev/null
+++ b/tests/test_flow4_ledger_validation.py
@@ -0,0 +1,149 @@
+"""Functionality tests for Flow 4 path-X-(b) ledger validation.
+
+Tests the pure helper `count_agent_session_decisions` from
+`tests/e2e/_ledger_helpers.py` and the merge logic that
+`_validate_flow4_via_ledger` applies to a `FlowResult`.
+"""
+
+from __future__ import annotations
+
+import sys
+from dataclasses import dataclass
+from pathlib import Path
+
+REPO_ROOT = Path(__file__).resolve().parent.parent
+sys.path.insert(0, str(REPO_ROOT / "tests" / "e2e"))
+
+from _ledger_helpers import count_agent_session_decisions  # noqa: E402
+
+
+@dataclass
+class FlowResultStub:
+    flow_id: str
+    passed: bool
+    verdict_reason: str
+    body: str
+
+
+def test_counts_zero_when_no_agent_session_decisions():
+    snapshot = {
+        "decisions": [
+            {"decision_id": "d1", "source_type": "manual"},
+            {"decision_id": "d2", "source_type": "transcript"},
+        ]
+    }
+    assert count_agent_session_decisions(snapshot) == 0
+
+
+def test_counts_only_agent_session_decisions():
+    snapshot = {
+        "decisions": [
+            {"decision_id": "d1", "source_type": "agent_session"},
+            {"decision_id": "d2", "source_type": "manual"},
+            {"decision_id": "d3", "source_type": "agent_session"},
+            {"decision_id": "d4", "source_type": "transcript"},
+            {"decision_id": "d5", "source_type": "manual"},
+            {"decision_id": "d6", "source_type": "manual"},
+            {"decision_id": "d7", "source_type": "manual"},
+            {"decision_id": "d8", "source_type": "agent_session"},
+        ]
+    }
+    assert count_agent_session_decisions(snapshot) == 3
+
+
+def test_handles_missing_source_type_field():
+    snapshot = {
+        "decisions": [
+            {"decision_id": "d1"},  # legacy row, no source_type
+            {"decision_id": "d2", "source_type": "agent_session"},
+            {"decision_id": "d3", "source_type": None},
+        ]
+    }
+    assert count_agent_session_decisions(snapshot) == 1
+
+
+def test_handles_error_snapshot():
+    snapshot = {"error": "connection failed"}
+    assert count_agent_session_decisions(snapshot) is None
+
+
+def _merge(flow: FlowResultStub, snapshot: dict) -> None:
+    """Mirror of `_validate_flow4_via_ledger`'s merge logic on a stub
+    FlowResult, so unit tests exercise the merge invariants without
+    importing the full harness module."""
+    count = count_agent_session_decisions(snapshot)
+    if count is None:
+        flow.body += (
+            f"\n— Ledger validation —\nINCONCLUSIVE: ledger query failed: {snapshot.get('error')}\n"
+        )
+        return
+    if count > 0:
+        if not flow.passed:
+            flow.passed = True
+            flow.verdict_reason = (
+                f"in-stream asserter FAIL but SessionEnd subprocess effect "
+                f"observed in ledger ({count} agent_session decisions, path-X-b)"
+            )
+        flow.body += (
+            f"\n— Ledger validation —\n"
+            f"PASS: {count} decision(s) with source_type='agent_session' "
+            f"present in ledger after harness completion (path-X-b: SessionEnd "
+            f"subprocess and/or in-session capture-corrections wrote them).\n"
+        )
+    else:
+        flow.body += (
+            "\n— Ledger validation —\n"
+            "path-X-b absent: zero decisions with source_type='agent_session' "
+            "after harness completion. SessionEnd subprocess either did not "
+            "fire, did not detect uningested corrections, or failed silently.\n"
+        )
+
+
+def test_validate_merges_pass_into_flow4_result():
+    """Asserter FAIL + ledger has agent_session → upgrade to PASS."""
+    flow = FlowResultStub(
+        flow_id="Flow 4",
+        passed=False,
+        verdict_reason="initial",
+        body="initial body",
+    )
+    snapshot = {
+        "decisions": [
+            {"decision_id": "d1", "source_type": "agent_session"},
+            {"decision_id": "d2", "source_type": "agent_session"},
+        ]
+    }
+    _merge(flow, snapshot)
+    assert flow.passed is True
+    assert "SessionEnd subprocess effect observed" in flow.verdict_reason
+    assert "agent_session" in flow.body
+
+
+def test_validate_preserves_existing_pass():
+    """Asserter PASS + ledger has agent_session → keep PASS, append note only."""
+    flow = FlowResultStub(
+        flow_id="Flow 4",
+        passed=True,
+        verdict_reason="initial",
+        body="initial body",
+    )
+    snapshot = {"decisions": [{"decision_id": "d1", "source_type": "agent_session"}]}
+    _merge(flow, snapshot)
+    assert flow.passed is True
+    assert flow.verdict_reason == "initial"
+    assert "Ledger validation" in flow.body
+
+
+def test_validate_handles_inconclusive_ledger():
+    """Ledger query error → INCONCLUSIVE annotation, verdict unchanged."""
+    flow = FlowResultStub(
+        flow_id="Flow 4",
+        passed=False,
+        verdict_reason="initial",
+        body="initial body",
+    )
+    snapshot = {"error": "connection failed"}
+    _merge(flow, snapshot)
+    assert flow.passed is False
+    assert flow.verdict_reason == "initial"
+    assert "INCONCLUSIVE" in flow.body
diff --git a/tests/test_link_commit_grounding.py b/tests/test_link_commit_grounding.py
index f96deba4..2d2aa9a1 100644
--- a/tests/test_link_commit_grounding.py
+++ b/tests/test_link_commit_grounding.py
@@ -6,6 +6,7 @@
 2. test_pending_grounding_checks_symbol_not_found — ingest a decision with a binding,
    then simulate symbol disappearing → link_commit emits grounding check for that decision
 """
+
 from __future__ import annotations
 
 import subprocess
@@ -19,7 +20,6 @@
 from handlers.ingest import handle_ingest
 from handlers.link_commit import handle_link_commit
 
-
 # ── Helpers ───────────────────────────────────────────────────────────────────
 
 
@@ -178,6 +178,7 @@ async def test_pending_grounding_checks_symbol_not_found(_isolated_ledger):
 
     # Invalidate the within-call sync cache so the handler runs a real sweep
     from handlers.link_commit import invalidate_sync_cache
+
     invalidate_sync_cache(ctx)
 
     # Simulate the old symbol (fetch_user) not being found in the new commit
diff --git a/tests/test_local_counters.py b/tests/test_local_counters.py
index 1b804204..fc7c8d25 100644
--- a/tests/test_local_counters.py
+++ b/tests/test_local_counters.py
@@ -18,7 +18,9 @@ def test_increment_creates_counter_file(tmp_path: Path, monkeypatch: pytest.Monk
     monkeypatch.setenv("HOME", str(tmp_path))
     monkeypatch.setenv("USERPROFILE", str(tmp_path))
     import importlib
+
     import local_counters
+
     importlib.reload(local_counters)
 
     local_counters.increment("bicameral-ingest")
@@ -33,7 +35,9 @@ def test_increment_appends(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> N
     monkeypatch.setenv("HOME", str(tmp_path))
     monkeypatch.setenv("USERPROFILE", str(tmp_path))
     import importlib
+
     import local_counters
+
     importlib.reload(local_counters)
 
     for _ in range(50):
@@ -46,7 +50,9 @@ def test_read_counters_aggregates(tmp_path: Path, monkeypatch: pytest.MonkeyPatc
     monkeypatch.setenv("HOME", str(tmp_path))
     monkeypatch.setenv("USERPROFILE", str(tmp_path))
     import importlib
+
     import local_counters
+
     importlib.reload(local_counters)
 
     for _ in range(3):
@@ -63,7 +69,9 @@ def test_no_network_calls(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> No
     monkeypatch.setenv("HOME", str(tmp_path))
     monkeypatch.setenv("USERPROFILE", str(tmp_path))
     import importlib
+
     import local_counters
+
     importlib.reload(local_counters)
 
     with patch("urllib.request.urlopen", side_effect=RuntimeError("net down")):
@@ -71,11 +79,15 @@ def test_no_network_calls(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> No
     assert _counters_path(tmp_path).exists()
 
 
-def test_concurrent_increments_no_data_loss(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
+def test_concurrent_increments_no_data_loss(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
     monkeypatch.setenv("HOME", str(tmp_path))
     monkeypatch.setenv("USERPROFILE", str(tmp_path))
     import importlib
+
     import local_counters
+
     importlib.reload(local_counters)
 
     def _worker(idx: int) -> None:
@@ -97,18 +109,24 @@ def test_disabled_when_env_off(tmp_path: Path, monkeypatch: pytest.MonkeyPatch)
     monkeypatch.setenv("USERPROFILE", str(tmp_path))
     monkeypatch.setenv("BICAMERAL_LOCAL_COUNTERS", "0")
     import importlib
+
     import local_counters
+
     importlib.reload(local_counters)
 
     local_counters.increment("bicameral-ingest")
     assert not _counters_path(tmp_path).exists()
 
 
-def test_read_counters_handles_missing_file(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
+def test_read_counters_handles_missing_file(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
     monkeypatch.setenv("HOME", str(tmp_path))
     monkeypatch.setenv("USERPROFILE", str(tmp_path))
     import importlib
+
     import local_counters
+
     importlib.reload(local_counters)
 
     assert local_counters.read_counters() == {}
diff --git a/tests/test_phase1_code_locator.py b/tests/test_phase1_code_locator.py
index e5b7e7fd..860ad283 100644
--- a/tests/test_phase1_code_locator.py
+++ b/tests/test_phase1_code_locator.py
@@ -16,7 +16,6 @@
 
 from adapters.code_locator import get_code_locator
 
-
 # ── Real adapter tests (Phase 1 — require indexed repo) ─────────────
 
 
@@ -71,6 +70,7 @@ def test_get_neighbors_returns_valid_edges(monkeypatch, repo_path):
 
 # ── extract_symbols ──────────────────────────────────────────────────
 
+
 @pytest.mark.phase1
 @pytest.mark.asyncio
 async def test_extract_symbols_from_known_file(monkeypatch, repo_path):
diff --git a/tests/test_phase1_l1_wiring.py b/tests/test_phase1_l1_wiring.py
index bf15afb8..08072904 100644
--- a/tests/test_phase1_l1_wiring.py
+++ b/tests/test_phase1_l1_wiring.py
@@ -31,7 +31,6 @@
 from handlers.decision_status import handle_decision_status
 from handlers.link_commit import handle_link_commit
 
-
 # ── Tiny git repo fixture ─────────────────────────────────────────────
 
 
@@ -178,12 +177,10 @@ async def test_ingest_of_existing_symbol_is_pending_until_verified(_isolated_led
     ctx = _ctx()
     status = await handle_decision_status(ctx, filter="all")
     assert status.summary.get("reflected", 0) == 0, (
-        f"v3 must not auto-promote to REFLECTED without a verdict, "
-        f"got summary={status.summary!r}"
+        f"v3 must not auto-promote to REFLECTED without a verdict, got summary={status.summary!r}"
     )
     assert status.summary.get("pending", 0) == 1, (
-        f"Expected 1 pending intent (grounded but unverified), "
-        f"got summary={status.summary!r}"
+        f"Expected 1 pending intent (grounded but unverified), got summary={status.summary!r}"
     )
 
 
@@ -221,8 +218,7 @@ async def test_hash_change_alone_does_not_flip_status_without_verdict(_isolated_
     ctx = _ctx()
     pre = await handle_decision_status(ctx, filter="all")
     assert pre.summary.get("pending", 0) == 1, (
-        f"Pre-edit baseline is PENDING under v3 (grounded, unverified), "
-        f"got summary={pre.summary!r}"
+        f"Pre-edit baseline is PENDING under v3 (grounded, unverified), got summary={pre.summary!r}"
     )
 
     # Invert the discount threshold — real semantic change, not cosmetic
@@ -335,14 +331,11 @@ async def test_backfill_restores_hash_but_stays_pending_without_verdict(_isolate
         f"got summary={status.summary!r}"
     )
     assert status.summary.get("pending", 0) == 1, (
-        f"Post-backfill region is hashed but unverified → PENDING, "
-        f"got summary={status.summary!r}"
+        f"Post-backfill region is hashed but unverified → PENDING, got summary={status.summary!r}"
     )
 
     # Defensive: confirm backfill actually re-stamped the content_hash
     # (the cache-key is now populated even though the verdict isn't).
     post_rows = await client.query("SELECT content_hash FROM code_region")
     hashes = [r.get("content_hash", "") for r in post_rows]
-    assert any(h for h in hashes), (
-        f"Backfill should have populated content_hash, got {hashes!r}"
-    )
+    assert any(h for h in hashes), f"Backfill should have populated content_hash, got {hashes!r}"
diff --git a/tests/test_phase2_ledger.py b/tests/test_phase2_ledger.py
index ce66a558..7b639e28 100644
--- a/tests/test_phase2_ledger.py
+++ b/tests/test_phase2_ledger.py
@@ -33,6 +33,7 @@ def _ctx():
 
 # ── Adapter availability ──────────────────────────────────────────────
 
+
 @pytest.mark.phase2
 @pytest.mark.asyncio
 async def test_real_ledger_adapter_instantiates(monkeypatch, surreal_url):
@@ -45,6 +46,7 @@ async def test_real_ledger_adapter_instantiates(monkeypatch, surreal_url):
 
 # ── Ingestion idempotency ─────────────────────────────────────────────
 
+
 @pytest.mark.phase2
 @pytest.mark.asyncio
 async def test_ingest_payload_creates_intent_node(monkeypatch, surreal_url, minimal_payload):
@@ -87,6 +89,7 @@ async def test_ingest_is_idempotent(monkeypatch, surreal_url, minimal_payload):
 
 # ── BM25 search ───────────────────────────────────────────────────────
 
+
 @pytest.mark.phase2
 @pytest.mark.asyncio
 async def test_bm25_search_finds_ingested_intent(monkeypatch, surreal_url):
@@ -99,24 +102,47 @@ async def test_bm25_search_finds_ingested_intent(monkeypatch, surreal_url):
         await ledger.connect()
 
     desc = "exponential backoff retry on webhook failure"
-    await ledger.ingest_payload({
-        "query": desc, "repo": "test-repo", "commit_hash": "bm25test",
-        "analyzed_at": "2026-03-27T12:00:00Z",
-        "mappings": [{
-            "span": {"span_id": "bm25-0", "source_type": "transcript", "text": desc, "speaker": "", "source_ref": ""},
-            "intent": desc, "symbols": ["WebhookDispatcher.send"],
-            "code_regions": [{"file_path": "webhooks/dispatcher.py", "symbol": "WebhookDispatcher.send",
-                              "type": "function", "start_line": 134, "end_line": 180, "purpose": "dispatch"}],
-            "dependency_edges": [],
-        }],
-    })
-
-    results = await ledger.search_by_query("retry webhook backoff", max_results=10, min_confidence=0.1)
+    await ledger.ingest_payload(
+        {
+            "query": desc,
+            "repo": "test-repo",
+            "commit_hash": "bm25test",
+            "analyzed_at": "2026-03-27T12:00:00Z",
+            "mappings": [
+                {
+                    "span": {
+                        "span_id": "bm25-0",
+                        "source_type": "transcript",
+                        "text": desc,
+                        "speaker": "",
+                        "source_ref": "",
+                    },
+                    "intent": desc,
+                    "symbols": ["WebhookDispatcher.send"],
+                    "code_regions": [
+                        {
+                            "file_path": "webhooks/dispatcher.py",
+                            "symbol": "WebhookDispatcher.send",
+                            "type": "function",
+                            "start_line": 134,
+                            "end_line": 180,
+                            "purpose": "dispatch",
+                        }
+                    ],
+                    "dependency_edges": [],
+                }
+            ],
+        }
+    )
+
+    results = await ledger.search_by_query(
+        "retry webhook backoff", max_results=10, min_confidence=0.1
+    )
     assert len(results) > 0, "BM25 returned no results for recently ingested intent"
     descs = [r["description"] for r in results]
-    assert any("webhook" in d.lower() or "retry" in d.lower() or "backoff" in d.lower() for d in descs), (
-        f"Relevant intent not surfaced by BM25. Got: {descs}"
-    )
+    assert any(
+        "webhook" in d.lower() or "retry" in d.lower() or "backoff" in d.lower() for d in descs
+    ), f"Relevant intent not surfaced by BM25. Got: {descs}"
 
 
 @pytest.mark.phase2
@@ -136,6 +162,7 @@ async def test_bm25_min_confidence_filters_results(monkeypatch, surreal_url):
 
 # ── Reverse traversal: file → decisions ──────────────────────────────
 
+
 @pytest.mark.phase2
 @pytest.mark.asyncio
 async def test_file_reverse_traversal_finds_decision(monkeypatch, surreal_url):
@@ -149,17 +176,38 @@ async def test_file_reverse_traversal_finds_decision(monkeypatch, surreal_url):
 
     file_path = "payments/processor.py"
     desc = "optimistic locking for cart updates"
-    await ledger.ingest_payload({
-        "query": desc, "repo": "test-repo", "commit_hash": "reversetest",
-        "analyzed_at": "2026-03-27T12:00:00Z",
-        "mappings": [{
-            "span": {"span_id": "rev-0", "source_type": "transcript", "text": desc, "speaker": "", "source_ref": ""},
-            "intent": desc, "symbols": ["CartService.updateItem"],
-            "code_regions": [{"file_path": file_path, "symbol": "CartService.updateItem",
-                              "type": "function", "start_line": 87, "end_line": 120, "purpose": "cart update"}],
-            "dependency_edges": [],
-        }],
-    })
+    await ledger.ingest_payload(
+        {
+            "query": desc,
+            "repo": "test-repo",
+            "commit_hash": "reversetest",
+            "analyzed_at": "2026-03-27T12:00:00Z",
+            "mappings": [
+                {
+                    "span": {
+                        "span_id": "rev-0",
+                        "source_type": "transcript",
+                        "text": desc,
+                        "speaker": "",
+                        "source_ref": "",
+                    },
+                    "intent": desc,
+                    "symbols": ["CartService.updateItem"],
+                    "code_regions": [
+                        {
+                            "file_path": file_path,
+                            "symbol": "CartService.updateItem",
+                            "type": "function",
+                            "start_line": 87,
+                            "end_line": 120,
+                            "purpose": "cart update",
+                        }
+                    ],
+                    "dependency_edges": [],
+                }
+            ],
+        }
+    )
 
     decisions = await ledger.get_decisions_for_file(file_path)
     assert len(decisions) > 0, f"No decisions found for {file_path!r} via reverse traversal"
@@ -182,6 +230,7 @@ async def test_unknown_file_returns_empty(monkeypatch, surreal_url):
 
 # ── link_commit idempotency ───────────────────────────────────────────
 
+
 @pytest.mark.phase2
 @pytest.mark.asyncio
 async def test_link_commit_idempotent(monkeypatch, surreal_url):
@@ -223,6 +272,7 @@ async def test_link_commit_updates_sync_cursor(monkeypatch, surreal_url):
 
 # ── decision_status via real graph ────────────────────────────────────
 
+
 @pytest.mark.phase2
 @pytest.mark.asyncio
 async def test_decision_status_reflects_ingested_data(monkeypatch, surreal_url, minimal_payload):
@@ -262,27 +312,41 @@ async def test_ungrounded_intent_has_correct_status(monkeypatch, surreal_url):
         await ledger.connect()
 
     desc = "zzqx qqzzyy nonsensetoken glarbflumph deliberate-gibberish wlrdpfnz"
-    await ledger.ingest_payload({
-        "query": desc, "repo": "test-repo", "commit_hash": "unground01",
-        "analyzed_at": "2026-03-27T12:00:00Z",
-        "mappings": [{
-            "span": {"span_id": "ug-0", "source_type": "transcript", "text": desc, "speaker": "", "source_ref": ""},
-            "intent": desc, "symbols": [], "code_regions": [], "dependency_edges": [],
-        }],
-    })
+    await ledger.ingest_payload(
+        {
+            "query": desc,
+            "repo": "test-repo",
+            "commit_hash": "unground01",
+            "analyzed_at": "2026-03-27T12:00:00Z",
+            "mappings": [
+                {
+                    "span": {
+                        "span_id": "ug-0",
+                        "source_type": "transcript",
+                        "text": desc,
+                        "speaker": "",
+                        "source_ref": "",
+                    },
+                    "intent": desc,
+                    "symbols": [],
+                    "code_regions": [],
+                    "dependency_edges": [],
+                }
+            ],
+        }
+    )
 
     # Query the ledger directly — handle_decision_status auto-syncs via
     # link_commit which triggers _reground_ungrounded, potentially changing
     # the status before we can assert on it.
     ungrounded = await ledger.get_all_decisions(filter="ungrounded")
     descs = [d.get("description", "") for d in ungrounded]
-    assert any(desc in d for d in descs), (
-        f"Expected {desc!r} in ungrounded filter. Got: {descs}"
-    )
+    assert any(desc in d for d in descs), f"Expected {desc!r} in ungrounded filter. Got: {descs}"
 
 
 # ── detect_drift with real reverse traversal ──────────────────────────
 
+
 @pytest.mark.phase2
 @pytest.mark.asyncio
 async def test_detect_drift_returns_decisions_for_ingested_file(monkeypatch, surreal_url):
@@ -296,17 +360,38 @@ async def test_detect_drift_returns_decisions_for_ingested_file(monkeypatch, sur
 
     file_path = "services/checkout.py"
     desc = "rate limit checkout endpoint"
-    await ledger.ingest_payload({
-        "query": desc, "repo": "test-repo", "commit_hash": "drift001",
-        "analyzed_at": "2026-03-27T12:00:00Z",
-        "mappings": [{
-            "span": {"span_id": "d-0", "source_type": "transcript", "text": desc, "speaker": "", "source_ref": "mtg-001"},
-            "intent": desc, "symbols": ["CheckoutService.process"],
-            "code_regions": [{"file_path": file_path, "symbol": "CheckoutService.process",
-                              "type": "function", "start_line": 45, "end_line": 90, "purpose": "checkout"}],
-            "dependency_edges": [],
-        }],
-    })
+    await ledger.ingest_payload(
+        {
+            "query": desc,
+            "repo": "test-repo",
+            "commit_hash": "drift001",
+            "analyzed_at": "2026-03-27T12:00:00Z",
+            "mappings": [
+                {
+                    "span": {
+                        "span_id": "d-0",
+                        "source_type": "transcript",
+                        "text": desc,
+                        "speaker": "",
+                        "source_ref": "mtg-001",
+                    },
+                    "intent": desc,
+                    "symbols": ["CheckoutService.process"],
+                    "code_regions": [
+                        {
+                            "file_path": file_path,
+                            "symbol": "CheckoutService.process",
+                            "type": "function",
+                            "start_line": 45,
+                            "end_line": 90,
+                            "purpose": "checkout",
+                        }
+                    ],
+                    "dependency_edges": [],
+                }
+            ],
+        }
+    )
 
     ctx = _ctx()
     result = await handle_detect_drift(ctx, file_path)
@@ -326,7 +411,9 @@ async def test_source_cursor_upserts_after_ingest(monkeypatch, surreal_url, mini
     from handlers.ingest import handle_ingest
 
     ctx = _ctx()
-    result = await handle_ingest(ctx, minimal_payload, source_scope="slack:C123", cursor="1743210021.123")
+    result = await handle_ingest(
+        ctx, minimal_payload, source_scope="slack:C123", cursor="1743210021.123"
+    )
 
     assert result.source_cursor is not None
     assert result.source_cursor.repo == "test-repo"
@@ -338,6 +425,7 @@ async def test_source_cursor_upserts_after_ingest(monkeypatch, surreal_url, mini
 
 # ── M1 decision-relevance instrumentation ────────────────────────────
 
+
 @pytest.mark.phase2
 @pytest.mark.asyncio
 async def test_ingest_stats_populates_grounded_fields(
@@ -346,6 +434,7 @@ async def test_ingest_stats_populates_grounded_fields(
     """handle_ingest must populate stats.grounded + stats.grounded_pct and
     emit a [ingest] complete log line. This is the M1 instrumentation gate."""
     import logging
+
     monkeypatch.setenv("USE_REAL_LEDGER", "1")
     monkeypatch.setenv("SURREAL_URL", surreal_url)
 
diff --git a/tests/test_phase3_integration.py b/tests/test_phase3_integration.py
index 253404db..c3a363b0 100644
--- a/tests/test_phase3_integration.py
+++ b/tests/test_phase3_integration.py
@@ -107,6 +107,7 @@ def _response_dict(response) -> dict:
 
 # ── Real code locator helpers ────────────────────────────────────────
 
+
 def _locate_hits(adapter, query_str: str, limit: int = 2) -> list[dict]:
     """Resolve a bag-of-words query to {file_path, symbol_name, line_number}
     hits for test payload construction.
@@ -134,12 +135,14 @@ def _locate_hits(adapter, query_str: str, limit: int = 2) -> list[dict]:
         row = db.lookup_by_id(sid)
         if row is None:
             continue
-        hits.append({
-            "file_path": row["file_path"],
-            "symbol_name": row["name"],
-            "line_number": row["start_line"],
-            "score": v.get("match_score", 0) / 100.0,
-        })
+        hits.append(
+            {
+                "file_path": row["file_path"],
+                "symbol_name": row["name"],
+                "line_number": row["start_line"],
+                "score": v.get("match_score", 0) / 100.0,
+            }
+        )
         if len(hits) >= limit:
             break
     return hits
@@ -175,30 +178,34 @@ def _build_payload_from_real_code(
                 sym = hit.get("symbol_name", "")
                 line = hit.get("line_number", 1)
                 if fp:
-                    code_regions.append({
-                        "file_path": fp,
-                        "symbol": sym or fp.split("/")[-1],
-                        "type": "function",
-                        "start_line": line,
-                        "end_line": line + 20,
-                        "purpose": f"Located from search terms: {item['search']!r}",
-                    })
+                    code_regions.append(
+                        {
+                            "file_path": fp,
+                            "symbol": sym or fp.split("/")[-1],
+                            "type": "function",
+                            "start_line": line,
+                            "end_line": line + 20,
+                            "purpose": f"Located from search terms: {item['search']!r}",
+                        }
+                    )
                     if sym:
                         symbols.append(sym)
 
-        mappings.append({
-            "span": {
-                "span_id": f"e2e-{i}",
-                "source_type": source_type,
-                "text": item["text"],
-                "speaker": item.get("speaker", ""),
-                "source_ref": source_ref,
-            },
-            "intent": item["intent"],
-            "symbols": symbols,
-            "code_regions": code_regions,
-            "dependency_edges": [],
-        })
+        mappings.append(
+            {
+                "span": {
+                    "span_id": f"e2e-{i}",
+                    "source_type": source_type,
+                    "text": item["text"],
+                    "speaker": item.get("speaker", ""),
+                    "source_ref": source_ref,
+                },
+                "intent": item["intent"],
+                "symbols": symbols,
+                "code_regions": code_regions,
+                "dependency_edges": [],
+            }
+        )
 
     return {
         "query": query,
@@ -215,6 +222,7 @@ def _build_payload_from_real_code(
 # Tool: bicameral.search — pre-flight before coding
 # ══════════════════════════════════════════════════════════════════════
 
+
 @pytest.mark.phase3
 @pytest.mark.asyncio
 async def test_constraint_lost__search_surfaces_prior_decisions(ctx):
@@ -278,6 +286,7 @@ async def test_constraint_lost__search_surfaces_prior_decisions(ctx):
 # Tool: bicameral.ingest — normalizes intent from multiple sources
 # ══════════════════════════════════════════════════════════════════════
 
+
 @pytest.mark.phase3
 @pytest.mark.asyncio
 async def test_context_scattered__ingest_unifies_sources(ctx):
@@ -355,6 +364,7 @@ async def test_context_scattered__ingest_unifies_sources(ctx):
 # Tool: bicameral.status — tracks decided vs built, surfaces ungrounded
 # ══════════════════════════════════════════════════════════════════════
 
+
 @pytest.mark.phase3
 @pytest.mark.asyncio
 async def test_decision_undocumented__status_surfaces_ungrounded(ctx):
@@ -414,6 +424,7 @@ async def test_decision_undocumented__status_surfaces_ungrounded(ctx):
 # Tool: search + code locator — retrieves full decision provenance
 # ══════════════════════════════════════════════════════════════════════
 
+
 @pytest.mark.phase3
 @pytest.mark.asyncio
 async def test_repeated_explanation__search_returns_full_provenance(ctx):
@@ -471,6 +482,7 @@ async def test_repeated_explanation__search_returns_full_provenance(ctx):
 # Tool: bicameral.drift — surfaces institutional memory tied to code
 # ══════════════════════════════════════════════════════════════════════
 
+
 @pytest.mark.phase3
 @pytest.mark.asyncio
 async def test_tribal_knowledge__drift_surfaces_decisions_for_file(ctx):
@@ -522,6 +534,7 @@ async def test_tribal_knowledge__drift_surfaces_decisions_for_file(ctx):
 # INTEGRATION: Full lifecycle + graph integrity
 # ══════════════════════════════════════════════════════════════════════
 
+
 @pytest.mark.phase3
 @pytest.mark.asyncio
 async def test_full_lifecycle_graph_integrity(ctx):
@@ -576,7 +589,9 @@ async def test_full_lifecycle_graph_integrity(ctx):
     _dump("06_lifecycle_03_status", _response_dict(r_status))
 
     # Step 4: Search
-    r_search = await handle_search_decisions(ctx, query="BM25 search provenance", min_confidence=0.1)
+    r_search = await handle_search_decisions(
+        ctx, query="BM25 search provenance", min_confidence=0.1
+    )
     assert len(r_search.matches) >= 1
     _dump("06_lifecycle_04_search", _response_dict(r_search))
 
diff --git a/tests/test_pollution_bug.py b/tests/test_pollution_bug.py
index 94e3102b..b60ddb12 100644
--- a/tests/test_pollution_bug.py
+++ b/tests/test_pollution_bug.py
@@ -30,13 +30,16 @@
 from handlers.ingest import handle_ingest
 from handlers.link_commit import handle_link_commit
 
-
 # ── Tiny git repo fixture with main + feature branch ─────────────────
 
 
 def _git(cwd: Path, *args: str, check: bool = True) -> str:
     result = subprocess.run(
-        ["git", *args], cwd=cwd, capture_output=True, text=True, check=check,
+        ["git", *args],
+        cwd=cwd,
+        capture_output=True,
+        text=True,
+        check=check,
     )
     return result.stdout.strip()
 
@@ -133,7 +136,9 @@ def _payload(repo: Path) -> dict:
 @pytest.mark.phase2
 @pytest.mark.asyncio
 async def test_ingest_on_branch_stamps_main_baseline(
-    monkeypatch, branched_repo, surreal_url,
+    monkeypatch,
+    branched_repo,
+    surreal_url,
 ):
     """Bug 3 (F1a) — ``handle_ingest`` from a feature branch must stamp
     baseline hashes against the authoritative ref (main), not the branch.
@@ -168,20 +173,27 @@ async def test_ingest_on_branch_stamps_main_baseline(
     # Query the ledger directly for the stamped content_hash
     ledger = get_ledger()
     client = ledger._client
-    rows = await client.query(
-        "SELECT content_hash FROM code_region WHERE file_path = 'pricing.py'"
-    )
+    rows = await client.query("SELECT content_hash FROM code_region WHERE file_path = 'pricing.py'")
     assert len(rows) >= 1, "code_region not created"
     stamped_hash = rows[0].get("content_hash", "")
     assert stamped_hash, "content_hash is empty — pollution guard failed upstream"
 
     # Compute what main's content hash SHOULD be
     from ledger.status import compute_content_hash
+
     main_hash = compute_content_hash(
-        "pricing.py", 1, 4, str(branched_repo), ref=ctx.authoritative_sha,
+        "pricing.py",
+        1,
+        4,
+        str(branched_repo),
+        ref=ctx.authoritative_sha,
     )
     branch_hash = compute_content_hash(
-        "pricing.py", 1, 4, str(branched_repo), ref="HEAD",
+        "pricing.py",
+        1,
+        4,
+        str(branched_repo),
+        ref="HEAD",
     )
 
     assert main_hash != branch_hash, "test setup broken: branch and main have the same hash"
@@ -196,7 +208,9 @@ async def test_ingest_on_branch_stamps_main_baseline(
 @pytest.mark.phase2
 @pytest.mark.asyncio
 async def test_link_commit_on_branch_runs_read_only(
-    monkeypatch, branched_repo, surreal_url,
+    monkeypatch,
+    branched_repo,
+    surreal_url,
 ):
     """Bug 1 (F1) — ``handle_link_commit`` on a branch must not update
     stored baseline hashes. Drift is computed for reporting, but the
diff --git a/tests/test_post_commit_sync_hook.py b/tests/test_post_commit_sync_hook.py
new file mode 100644
index 00000000..bd96d44f
--- /dev/null
+++ b/tests/test_post_commit_sync_hook.py
@@ -0,0 +1,135 @@
+"""Functionality tests for scripts/hooks/post_commit_sync_reminder.py.
+
+The hook is invoked as a subprocess by Claude Code on every PostToolUse
+matching ``Bash``. Tests run it the same way to exercise stdin/stdout
+exactly as production does.
+
+Claude Code 2.x requires PostToolUse hook output shaped as
+``{"hookSpecificOutput": {"hookEventName": "PostToolUse",
+"additionalContext": "..."}}``. Plain stdout from PostToolUse hooks is
+silently dropped to the debug log (per
+https://code.claude.com/docs/en/hooks). These tests assert against the
+envelope shape — anything else is a broken contract.
+"""
+
+from __future__ import annotations
+
+import json
+import subprocess
+import sys
+from pathlib import Path
+
+REPO_ROOT = Path(__file__).resolve().parent.parent
+HOOK_SCRIPT = REPO_ROOT / "scripts" / "hooks" / "post_commit_sync_reminder.py"
+
+
+def _run_hook(stdin_text: str) -> tuple[int, str, str]:
+    proc = subprocess.run(
+        [sys.executable, str(HOOK_SCRIPT)],
+        input=stdin_text,
+        capture_output=True,
+        text=True,
+        timeout=10,
+    )
+    return proc.returncode, proc.stdout, proc.stderr
+
+
+def _make_stdin(*, tool_name: str = "Bash", command: str = "") -> str:
+    return json.dumps({"tool_name": tool_name, "tool_input": {"command": command}})
+
+
+def _hook_output(parsed: dict) -> dict:
+    """Extract hookSpecificOutput.additionalContext, asserting envelope shape."""
+    assert "hookSpecificOutput" in parsed, (
+        f"hook must emit hookSpecificOutput envelope (Claude Code 2.x contract); got {parsed!r}"
+    )
+    inner = parsed["hookSpecificOutput"]
+    assert inner.get("hookEventName") == "PostToolUse"
+    return inner
+
+
+def _assert_silent(out: str) -> None:
+    """No envelope written. Tolerate fully-empty stdout or `{}`."""
+    if not out.strip():
+        return
+    parsed = json.loads(out)
+    assert "hookSpecificOutput" not in parsed
+
+
+def test_emits_reminder_on_git_commit():
+    rc, out, _ = _run_hook(_make_stdin(command="git commit -m 'feat: add foo'"))
+    assert rc == 0
+    inner = _hook_output(json.loads(out))
+    ctx = inner["additionalContext"]
+    assert "bicameral: new commit detected" in ctx
+    assert "/bicameral:sync" in ctx
+
+
+def test_emits_reminder_on_git_merge():
+    rc, out, _ = _run_hook(_make_stdin(command="git merge feature/foo --no-ff"))
+    assert rc == 0
+    inner = _hook_output(json.loads(out))
+    assert "bicameral: new commit detected" in inner["additionalContext"]
+
+
+def test_emits_reminder_on_git_pull():
+    rc, out, _ = _run_hook(_make_stdin(command="git pull origin main"))
+    assert rc == 0
+    inner = _hook_output(json.loads(out))
+    assert "bicameral: new commit detected" in inner["additionalContext"]
+
+
+def test_emits_reminder_on_git_rebase_continue():
+    rc, out, _ = _run_hook(_make_stdin(command="git rebase --continue"))
+    assert rc == 0
+    inner = _hook_output(json.loads(out))
+    assert "bicameral: new commit detected" in inner["additionalContext"]
+
+
+def test_silent_on_read_only_git_command():
+    """git status, git log, git diff, etc. → silent."""
+    for cmd in ["git status", "git log -10", "git diff HEAD", "git branch -a"]:
+        rc, out, _ = _run_hook(_make_stdin(command=cmd))
+        assert rc == 0
+        _assert_silent(out)
+
+
+def test_silent_on_non_bash_tool():
+    """Hook only fires for Bash; other tools → silent."""
+    rc, out, _ = _run_hook(_make_stdin(tool_name="Edit", command="git commit"))
+    assert rc == 0
+    _assert_silent(out)
+
+
+def test_silent_on_non_git_bash_command():
+    rc, out, _ = _run_hook(_make_stdin(command="ls -la"))
+    assert rc == 0
+    _assert_silent(out)
+
+
+def test_handles_malformed_stdin():
+    rc, out, _ = _run_hook("this is not JSON at all {[}")
+    assert rc == 0
+    _assert_silent(out)
+
+
+def test_handles_missing_tool_input():
+    payload = json.dumps({"tool_name": "Bash"})
+    rc, out, _ = _run_hook(payload)
+    assert rc == 0
+    _assert_silent(out)
+
+
+def test_handles_non_dict_tool_input():
+    payload = json.dumps({"tool_name": "Bash", "tool_input": "git commit"})
+    rc, out, _ = _run_hook(payload)
+    assert rc == 0
+    _assert_silent(out)
+
+
+def test_idempotent_on_double_fire():
+    stdin = _make_stdin(command="git commit -m 'whatever'")
+    rc1, out1, _ = _run_hook(stdin)
+    rc2, out2, _ = _run_hook(stdin)
+    assert rc1 == rc2 == 0
+    assert out1 == out2
diff --git a/tests/test_post_preflight_capture_hook.py b/tests/test_post_preflight_capture_hook.py
new file mode 100644
index 00000000..58395653
--- /dev/null
+++ b/tests/test_post_preflight_capture_hook.py
@@ -0,0 +1,197 @@
+"""Functionality tests for scripts/hooks/post_preflight_capture_reminder.py.
+
+The hook is invoked as a subprocess by Claude Code on every PostToolUse
+matching ``mcp__bicameral__bicameral_preflight``. Tests run it the same
+way to exercise stdin/stdout exactly as production does.
+
+Claude Code 2.x requires PostToolUse hook output shaped as
+``{"hookSpecificOutput": {"hookEventName": "PostToolUse",
+"additionalContext": "..."}}``. Plain stdout from PostToolUse hooks is
+silently dropped to the debug log (per
+https://code.claude.com/docs/en/hooks — only UserPromptSubmit /
+UserPromptExpansion / SessionStart treat raw stdout as agent-visible
+context). These tests assert against the envelope shape — anything else
+is a broken contract regardless of whether the hook process exits
+cleanly.
+"""
+
+from __future__ import annotations
+
+import json
+import subprocess
+import sys
+from pathlib import Path
+
+REPO_ROOT = Path(__file__).resolve().parent.parent
+HOOK_SCRIPT = REPO_ROOT / "scripts" / "hooks" / "post_preflight_capture_reminder.py"
+
+PREFLIGHT_TOOL_NAME = "mcp__bicameral__bicameral_preflight"
+
+
+def _run_hook(stdin_text: str) -> tuple[int, str, str]:
+    """Invoke the hook with stdin_text on stdin; return (rc, stdout, stderr)."""
+    proc = subprocess.run(
+        [sys.executable, str(HOOK_SCRIPT)],
+        input=stdin_text,
+        capture_output=True,
+        text=True,
+        timeout=10,
+    )
+    return proc.returncode, proc.stdout, proc.stderr
+
+
+def _make_stdin(*, fired: bool, decisions: list[dict], response_as_string: bool = False) -> str:
+    response = {"fired": fired, "decisions": decisions}
+    payload = {
+        "tool_name": PREFLIGHT_TOOL_NAME,
+        "tool_input": {"topic": "reorder commits", "file_paths": ["app/src/lib/git/reorder.ts"]},
+        "tool_response": json.dumps(response) if response_as_string else response,
+    }
+    return json.dumps(payload)
+
+
+def _hook_output(parsed: dict) -> dict:
+    """Extract hookSpecificOutput.additionalContext, asserting envelope shape."""
+    assert "hookSpecificOutput" in parsed, (
+        f"hook must emit hookSpecificOutput envelope (Claude Code 2.x contract); got {parsed!r}"
+    )
+    inner = parsed["hookSpecificOutput"]
+    assert inner.get("hookEventName") == "PostToolUse"
+    return inner
+
+
+def test_emits_reminder_when_decisions_surfaced():
+    """fired=True with ≥1 decision → envelope with reminder containing each
+    decision_id + the Step 5.6.1 AskUserQuestion disambiguation template
+    (per #175). The reminder no longer templates the bare ingest+
+    resolve_collision sequence — it templates the user-disambiguation
+    question whose answer drives Step 5.6.2's mechanical capture.
+    """
+    stdin = _make_stdin(
+        fired=True,
+        decisions=[
+            {"decision_id": "decision:abc123", "description": "Drag-and-drop to reorder commits"},
+            {"decision_id": "decision:def456", "description": "Cherry-pick across branches"},
+        ],
+    )
+    rc, out, _ = _run_hook(stdin)
+    assert rc == 0
+    inner = _hook_output(json.loads(out))
+    ctx = inner["additionalContext"]
+    assert "<system-reminder>" in ctx
+    # Surfaced decisions are listed verbatim so the agent can scope the
+    # disambiguation question.
+    assert "decision:abc123" in ctx
+    assert "decision:def456" in ctx
+    assert "Drag-and-drop to reorder commits" in ctx
+    # The Step 5.6.1 AskUserQuestion shape is templated.
+    assert "AskUserQuestion" in ctx
+    assert "supersede" in ctx and "keep_both" in ctx
+    assert "unrelated" in ctx
+    # Branch instructions for Step 5.6.2 are still present so the agent
+    # knows what to do with each answer.
+    assert "agent_session" in ctx
+    assert "resolve_collision" in ctx
+
+
+def test_reminder_routes_judgment_to_user_not_agent():
+    """Per #175, the agent must NOT judge contradiction itself — it asks
+    the user via ``AskUserQuestion`` and acts on the answer mechanically.
+    Lock the user-disambiguation posture in so future edits don't quietly
+    regress to ``"you MUST capture"`` (which the agent demonstrably
+    ignored on borderline prompts) or to the original ``"IF you
+    contradict ..."`` conditional gate.
+    """
+    stdin = _make_stdin(
+        fired=True,
+        decisions=[{"decision_id": "decision:abc", "description": "Some prior decision"}],
+    )
+    _, out, _ = _run_hook(stdin)
+    ctx = _hook_output(json.loads(out))["additionalContext"]
+    # Affirmative: judgment moves to the user.
+    assert "do NOT judge contradiction yourself" in ctx
+    assert "ask the user" in ctx
+    assert "AskUserQuestion" in ctx
+    # Negative: must NOT contain the prior unconditional capture wording
+    # (which short-circuited the user-in-the-loop design) NOR the original
+    # conditional escape hatch (which over-deferred to agent judgment).
+    assert "BEFORE any code edits, you MUST capture" not in ctx
+    assert "If your current prompt CONTRADICTS" not in ctx
+    assert "If your prompt is COMPATIBLE" not in ctx
+    assert "ignore this and proceed normally" not in ctx
+
+
+def _assert_silent(out: str) -> None:
+    """No envelope written. Tolerate fully-empty stdout or `{}`."""
+    if not out.strip():
+        return
+    parsed = json.loads(out)
+    assert "hookSpecificOutput" not in parsed
+
+
+def test_silent_when_fired_false():
+    """fired=False → no envelope."""
+    stdin = _make_stdin(fired=False, decisions=[])
+    rc, out, _ = _run_hook(stdin)
+    assert rc == 0
+    _assert_silent(out)
+
+
+def test_silent_when_decisions_empty():
+    """fired=True but decisions=[] → no envelope (nothing to contradict)."""
+    stdin = _make_stdin(fired=True, decisions=[])
+    rc, out, _ = _run_hook(stdin)
+    assert rc == 0
+    _assert_silent(out)
+
+
+def test_handles_response_as_json_string():
+    """tool_response can arrive as a JSON-encoded string; reminder still fires."""
+    stdin = _make_stdin(
+        fired=True,
+        decisions=[{"decision_id": "decision:xyz", "description": "Some constraint"}],
+        response_as_string=True,
+    )
+    rc, out, _ = _run_hook(stdin)
+    assert rc == 0
+    inner = _hook_output(json.loads(out))
+    assert "decision:xyz" in inner["additionalContext"]
+
+
+def test_silent_when_tool_name_does_not_match():
+    """Hook only fires for bicameral_preflight; other tools → silent."""
+    payload = {
+        "tool_name": "Bash",
+        "tool_input": {"command": "git commit"},
+        "tool_response": {"fired": True, "decisions": [{"decision_id": "x", "description": "y"}]},
+    }
+    rc, out, _ = _run_hook(json.dumps(payload))
+    assert rc == 0
+    _assert_silent(out)
+
+
+def test_handles_malformed_stdin():
+    """Non-JSON stdin returns rc 0 with no envelope — never blocks user."""
+    rc, out, _ = _run_hook("this is not JSON at all {[}")
+    assert rc == 0
+    _assert_silent(out)
+
+
+def test_handles_missing_tool_response():
+    """Payload without tool_response → silent (no contradiction signal)."""
+    payload = {"tool_name": PREFLIGHT_TOOL_NAME, "tool_input": {}}
+    rc, out, _ = _run_hook(json.dumps(payload))
+    assert rc == 0
+    _assert_silent(out)
+
+
+def test_idempotent_on_double_fire():
+    """Same input twice produces identical output (no state leak)."""
+    stdin = _make_stdin(
+        fired=True,
+        decisions=[{"decision_id": "decision:abc", "description": "Some decision"}],
+    )
+    rc1, out1, _ = _run_hook(stdin)
+    rc2, out2, _ = _run_hook(stdin)
+    assert rc1 == rc2 == 0
+    assert out1 == out2
diff --git a/tests/test_preflight_graph_expansion.py b/tests/test_preflight_graph_expansion.py
new file mode 100644
index 00000000..1e5d3fb3
--- /dev/null
+++ b/tests/test_preflight_graph_expansion.py
@@ -0,0 +1,434 @@
+"""Tests for the 1-hop code-graph expansion in region-anchored preflight (#173).
+
+Two layers:
+
+1. **Adapter unit** — ``RealCodeLocatorAdapter.expand_file_paths_via_graph``
+   returns the union of input file paths plus 1-hop neighbor files,
+   bounded by ``max_neighbors_per_result``. Exercised against an
+   in-memory ``SymbolDB`` with hand-inserted symbols and edges so the
+   test doesn't depend on a real codebase index.
+
+2. **Handler integration** — ``_region_anchored_preflight`` in
+   ``handlers/preflight.py`` calls the expander, surfaces decisions
+   bound to expanded paths with ``confidence=0.7``, and tags
+   ``"graph"`` on ``sources_chained``. The structural
+   distance scenario: a decision is bound to ``app/src/lib/git/reorder.ts``;
+   the caller passes ``["app/src/ui/multi-commit-operation/reorder.tsx"]``
+   (a graph neighbor); the decision still surfaces.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from adapters.code_locator import RealCodeLocatorAdapter
+from adapters.ledger import reset_ledger_singleton
+from code_locator.config import CodeLocatorConfig
+from code_locator.indexing.sqlite_store import SymbolDB, SymbolRecord
+from context import BicameralContext
+from handlers.bind import handle_bind
+from handlers.ingest import handle_ingest
+from handlers.preflight import handle_preflight
+
+
+def _build_ingest_payload(description: str) -> dict:
+    """Internal-format ingest payload that produces a single ratified mapping.
+
+    Mirrors the shape used by ``test_alpha_contract::_ingest_payload`` with
+    ``with_region=False`` + ``signoff=True`` so the test ingest produces an
+    ungrounded decision ready to bind in the next step.
+    """
+    return {
+        "query": description,
+        "repo": "graph-expand-test-repo",
+        "mappings": [
+            {
+                "intent": description,
+                "span": {
+                    "source_type": "transcript",
+                    "text": description,
+                    "source_ref": "graph-expand-test",
+                    "speakers": ["test@example.com"],
+                    "meeting_date": "2026-05-04",
+                },
+                "symbols": [],
+                "code_regions": [],
+                "signoff": {
+                    "state": "ratified",
+                    "signer": "test@example.com",
+                    "ratified_at": "2026-05-04T00:00:00Z",
+                    "session_id": None,
+                },
+            }
+        ],
+    }
+
+
+def _stub_adapter_with(db: SymbolDB, max_neighbors: int = 10) -> RealCodeLocatorAdapter:
+    """Build a RealCodeLocatorAdapter wired to a hand-built SymbolDB.
+
+    Bypasses the ``_ensure_initialized`` index-presence check so we don't
+    have to point at a real codebase. Sets ``_initialized=True`` and
+    populates ``_db`` + ``_config`` directly — the only attributes
+    ``expand_file_paths_via_graph`` reads.
+    """
+    adapter = RealCodeLocatorAdapter(repo_path=".")
+    adapter._db = db
+    adapter._config = CodeLocatorConfig(max_neighbors_per_result=max_neighbors)
+    adapter._initialized = True
+    return adapter
+
+
+def _build_synthetic_db(tmp_path) -> SymbolDB:
+    """Two files, one edge: ``reorder.tsx`` imports a symbol from ``reorder.ts``."""
+    db = SymbolDB(str(tmp_path / "sym.db"))
+    db.init_db()
+    db.insert_symbols_batch(
+        [
+            # symbol id 1 — git-layer (where the decision is bound)
+            SymbolRecord(
+                name="reorder",
+                qualified_name="reorder",
+                type="function",
+                file_path="app/src/lib/git/reorder.ts",
+                start_line=10,
+                end_line=80,
+                signature="export function reorder(...)",
+                parent_qualified_name="",
+            ),
+            # symbol id 2 — UI layer (caller's chosen file)
+            SymbolRecord(
+                name="Reorder",
+                qualified_name="Reorder",
+                type="class",
+                file_path="app/src/ui/multi-commit-operation/reorder.tsx",
+                start_line=4,
+                end_line=27,
+                signature="export class Reorder ...",
+                parent_qualified_name="",
+            ),
+        ]
+    )
+    # The UI symbol invokes / imports the git-layer symbol → bidirectional edge.
+    db.insert_edges_batch([(2, 1, "imports")])
+    return db
+
+
+# ── Adapter unit tests ──────────────────────────────────────────────────
+
+
+def test_expander_finds_1_hop_neighbor_file(tmp_path):
+    """Passing the UI file alone returns it + the git-layer neighbor."""
+    db = _build_synthetic_db(tmp_path)
+    adapter = _stub_adapter_with(db)
+    expanded, added = adapter.expand_file_paths_via_graph(
+        ["app/src/ui/multi-commit-operation/reorder.tsx"], hops=1
+    )
+    assert "app/src/ui/multi-commit-operation/reorder.tsx" in expanded
+    assert "app/src/lib/git/reorder.ts" in expanded
+    assert added == ["app/src/lib/git/reorder.ts"]
+
+
+def test_expander_preserves_input_paths_when_no_neighbors(tmp_path):
+    """A file with indexed symbols but no edges yields no expansion."""
+    db = SymbolDB(str(tmp_path / "lonely.db"))
+    db.init_db()
+    db.insert_symbols_batch(
+        [
+            SymbolRecord(
+                name="standalone",
+                qualified_name="standalone",
+                type="function",
+                file_path="app/src/lonely.ts",
+                start_line=1,
+                end_line=10,
+                signature="",
+                parent_qualified_name="",
+            )
+        ]
+    )
+    adapter = _stub_adapter_with(db)
+    expanded, added = adapter.expand_file_paths_via_graph(["app/src/lonely.ts"], hops=1)
+    assert expanded == ["app/src/lonely.ts"]
+    assert added == []
+
+
+def test_expander_handles_empty_input():
+    db = SymbolDB(":memory:")
+    db.init_db()
+    adapter = _stub_adapter_with(db)
+    expanded, added = adapter.expand_file_paths_via_graph([], hops=1)
+    assert expanded == []
+    assert added == []
+
+
+def test_expander_handles_unindexed_file(tmp_path):
+    """A file with NO symbols in the index contributes nothing — no crash."""
+    db = _build_synthetic_db(tmp_path)
+    adapter = _stub_adapter_with(db)
+    expanded, added = adapter.expand_file_paths_via_graph(["app/src/never-indexed.ts"], hops=1)
+    assert expanded == ["app/src/never-indexed.ts"]
+    assert added == []
+
+
+def test_expander_caps_hub_file_explosion(tmp_path):
+    """A hub file with many neighbors does not blow up the result set.
+
+    Per-symbol cap = ``max_neighbors_per_result``; global cap scales with
+    input size. With one input file and ``max_neighbors=2``, expansion
+    should add at most 2 paths.
+    """
+    db = SymbolDB(str(tmp_path / "hub.db"))
+    db.init_db()
+    # 1 hub symbol + 5 neighbor symbols, each in a different file.
+    records = [
+        SymbolRecord("hub", "hub", "function", "hub.ts", 1, 5, "", ""),
+    ]
+    for i in range(5):
+        records.append(
+            SymbolRecord(
+                f"neigh_{i}",
+                f"neigh_{i}",
+                "function",
+                f"neigh_{i}.ts",
+                1,
+                3,
+                "",
+                "",
+            )
+        )
+    db.insert_symbols_batch(records)
+    # Hub imports each of the 5 neighbors. (Use ``imports`` not ``invokes``
+    # because the expander now filters to import edges only — see
+    # ``test_expander_filters_to_imports_only`` and #64.)
+    db.insert_edges_batch([(1, i + 2, "imports") for i in range(5)])
+
+    adapter = _stub_adapter_with(db, max_neighbors=2)
+    expanded, added = adapter.expand_file_paths_via_graph(["hub.ts"], hops=1)
+    # Per-symbol cap caps the per-symbol neighbor walk at 2, so even though 5
+    # neighbors exist, expansion adds at most 2.
+    assert len(added) <= 2
+    assert len(added) > 0, "imports-edges hub should produce some expansion"
+    assert "hub.ts" in expanded
+
+
+def test_expander_filters_to_imports_only(tmp_path):
+    """Per #64: only ``imports`` edges expand; ``invokes`` / ``inherits`` /
+    ``contains`` are symbol-level edges that over-broaden the file-level
+    expansion. A neighbor reachable only via a non-import edge must NOT
+    appear in the expanded set.
+    """
+    db = SymbolDB(str(tmp_path / "edge_filter.db"))
+    db.init_db()
+    db.insert_symbols_batch(
+        [
+            SymbolRecord("caller", "caller", "function", "caller.ts", 1, 5, "", ""),
+            SymbolRecord("import_target", "import_target", "function", "imp.ts", 1, 5, "", ""),
+            SymbolRecord("invoke_target", "invoke_target", "function", "inv.ts", 1, 5, "", ""),
+            SymbolRecord("inherit_target", "inherit_target", "class", "inh.ts", 1, 5, "", ""),
+        ]
+    )
+    db.insert_edges_batch(
+        [
+            (1, 2, "imports"),  # caller → imp.ts (should expand)
+            (1, 3, "invokes"),  # caller → inv.ts (should NOT expand)
+            (1, 4, "inherits"),  # caller → inh.ts (should NOT expand)
+        ]
+    )
+    adapter = _stub_adapter_with(db)
+    _, added = adapter.expand_file_paths_via_graph(["caller.ts"], hops=1)
+    assert added == ["imp.ts"], f"only imports-edged neighbors should expand; got: {added}"
+
+
+def test_expander_falls_back_when_uninitialized():
+    """If the symbol index isn't available, returns inputs unchanged."""
+    adapter = RealCodeLocatorAdapter(repo_path=".")
+    # _initialized stays False; calling _ensure_initialized would raise
+    # because there's no index. The expander must catch that and fall back.
+    expanded, added = adapter.expand_file_paths_via_graph(["a.ts", "b.ts"], hops=1)
+    assert expanded == ["a.ts", "b.ts"]
+    assert added == []
+
+
+# ── Handler integration test ────────────────────────────────────────────
+
+
+class _FakeCodeGraph:
+    """Minimal code_graph wrapper for handle_preflight: overrides
+    ``expand_file_paths_via_graph`` with a hard-coded expansion, forwards
+    every other attribute to the real adapter (so ``resolve_symbols`` etc.
+    still work for the surrounding ingest/bind calls). Lets us prove the
+    handler wiring (sources_chained tag, expansion-provenance confidence)
+    without depending on a real symbol index in the test environment.
+    """
+
+    def __init__(self, real, *, expansion_for_tsx: list[str]) -> None:
+        self._real = real
+        self._expansion = expansion_for_tsx
+        self.calls: list[list[str]] = []
+
+    def expand_file_paths_via_graph(
+        self,
+        file_paths: list[str],
+        hops: int = 1,
+    ) -> tuple[list[str], list[str]]:
+        self.calls.append(list(file_paths))
+        added = [p for p in self._expansion if p not in file_paths]
+        return list(file_paths) + added, added
+
+    def __getattr__(self, name: str):
+        # Forward unknown attribute access to the real adapter so other
+        # handlers (ingest's resolve_symbols, etc.) keep working.
+        return getattr(self._real, name)
+
+
+@pytest.fixture
+def integration_env(monkeypatch, tmp_path):
+    """In-memory ledger + git-initialized repo + repo-rooted ctx; same shape
+    as ``test_alpha_contract::alpha_env`` pared down to what graph-expansion
+    needs. Requires git init because ``ensure_ledger_synced`` walks HEAD.
+    """
+    import subprocess
+
+    monkeypatch.setenv("SURREAL_URL", "memory://")
+    repo_root = tmp_path / "graph-expand-repo"
+    repo_root.mkdir()
+    subprocess.run(["git", "init", "-q", "-b", "main"], cwd=repo_root, check=True)
+    subprocess.run(["git", "config", "user.email", "test@example.com"], cwd=repo_root, check=True)
+    subprocess.run(["git", "config", "user.name", "Test User"], cwd=repo_root, check=True)
+    # Seed the two files the tests bind / preflight against. handle_bind
+    # verifies the file exists at HEAD so we have to materialize them.
+    git_layer = repo_root / "app" / "src" / "lib" / "git"
+    git_layer.mkdir(parents=True)
+    (git_layer / "reorder.ts").write_text(
+        "// stub for graph-expansion test\nexport function reorder() { return 0 }\n"
+    )
+    ui_layer = repo_root / "app" / "src" / "ui" / "multi-commit-operation"
+    ui_layer.mkdir(parents=True)
+    (ui_layer / "reorder.tsx").write_text(
+        "// stub for graph-expansion test\nexport class Reorder {}\n"
+    )
+    subprocess.run(["git", "add", "."], cwd=repo_root, check=True)
+    subprocess.run(
+        ["git", "-c", "commit.gpgsign=false", "commit", "-q", "-m", "seed"],
+        cwd=repo_root,
+        check=True,
+    )
+
+    monkeypatch.setenv("REPO_PATH", str(repo_root))
+    monkeypatch.chdir(repo_root)
+    reset_ledger_singleton()
+    ctx = BicameralContext.from_env()
+    yield ctx
+    reset_ledger_singleton()
+
+
+@pytest.mark.asyncio
+async def test_preflight_surfaces_via_graph_expansion(integration_env, monkeypatch):
+    """Caller passes a UI-layer file; the decision is bound to a git-layer
+    file 1 hop away; preflight surfaces it via expansion with
+    ``confidence=0.7`` and tags ``sources_chained`` accordingly.
+    """
+    import dataclasses
+
+    monkeypatch.setenv("BICAMERAL_GUIDED_MODE", "1")
+    # Stub code_graph: when caller passes the UI file, expansion adds the
+    # git-layer file (where the bind lives). BicameralContext is a frozen
+    # dataclass; clone with dataclasses.replace to swap in the fake.
+    base = BicameralContext.from_env()
+    ctx = dataclasses.replace(
+        base,
+        code_graph=_FakeCodeGraph(
+            base.code_graph,
+            expansion_for_tsx=["app/src/lib/git/reorder.ts"],
+        ),
+    )
+
+    ingest_resp = await handle_ingest(
+        ctx,
+        _build_ingest_payload("Drag-to-reorder commits via the git-layer reorder helper."),
+    )
+    decision_id = ingest_resp.pending_grounding_decisions[0]["decision_id"]
+    bind_resp = await handle_bind(
+        ctx,
+        bindings=[
+            {
+                "decision_id": decision_id,
+                "file_path": "app/src/lib/git/reorder.ts",
+                "symbol_name": "reorder",
+                "start_line": 10,
+                "end_line": 80,
+            }
+        ],
+    )
+    assert bind_resp.bindings[0].error is None
+
+    pf_resp = await handle_preflight(
+        ctx,
+        topic="refactor the reorder UI to use a text-editor flow",
+        file_paths=["app/src/ui/multi-commit-operation/reorder.tsx"],
+    )
+
+    # The bound decision must surface even though caller passed the UI file.
+    decision_ids = [d.decision_id for d in pf_resp.decisions]
+    assert decision_id in decision_ids, (
+        f"bound decision {decision_id} must surface via 1-hop expansion; "
+        f"got: {decision_ids}; sources={pf_resp.sources_chained}"
+    )
+
+    # And it should be marked as expansion-provenance, not direct.
+    # `decisions` on PreflightResponse is BriefDecision (no confidence field);
+    # the confidence lives on the underlying DecisionMatch via the region
+    # lookup. The signal we can assert end-to-end is sources_chained.
+    assert "region" in pf_resp.sources_chained
+    assert "graph" in pf_resp.sources_chained, (
+        f"expected 'graph' in sources_chained when graph "
+        f"expansion produced extra hits; got: {pf_resp.sources_chained}"
+    )
+
+
+@pytest.mark.asyncio
+async def test_preflight_does_not_tag_expanded_when_direct_pin_alone(integration_env, monkeypatch):
+    """When caller passes the bound file directly, expansion may add neighbors
+    but the decision is reached via a direct pin — `sources_chained` should
+    contain `region` but NOT `graph` (the existing decision
+    is direct, not expanded).
+    """
+    import dataclasses
+
+    monkeypatch.setenv("BICAMERAL_GUIDED_MODE", "1")
+    # Expander returns no extra paths when the caller already passed the
+    # bound file directly (simulates a clean discovery).
+    base = BicameralContext.from_env()
+    ctx = dataclasses.replace(
+        base,
+        code_graph=_FakeCodeGraph(base.code_graph, expansion_for_tsx=[]),
+    )
+
+    ingest_resp = await handle_ingest(ctx, _build_ingest_payload("Direct-pin baseline."))
+    decision_id = ingest_resp.pending_grounding_decisions[0]["decision_id"]
+    await handle_bind(
+        ctx,
+        bindings=[
+            {
+                "decision_id": decision_id,
+                "file_path": "app/src/lib/git/reorder.ts",
+                "symbol_name": "reorder",
+                "start_line": 10,
+                "end_line": 80,
+            }
+        ],
+    )
+
+    pf_resp = await handle_preflight(
+        ctx,
+        topic="edit reorder",
+        file_paths=["app/src/lib/git/reorder.ts"],
+    )
+
+    decision_ids = [d.decision_id for d in pf_resp.decisions]
+    assert decision_id in decision_ids
+    assert "region" in pf_resp.sources_chained
+    assert "graph" not in pf_resp.sources_chained, (
+        f"direct pin alone must not tag 'graph'; got: {pf_resp.sources_chained}"
+    )
diff --git a/tests/test_preflight_hook.py b/tests/test_preflight_hook.py
new file mode 100644
index 00000000..fe76eb01
--- /dev/null
+++ b/tests/test_preflight_hook.py
@@ -0,0 +1,122 @@
+"""Functionality tests for scripts/hooks/preflight_reminder.py.
+
+The hook is invoked as a subprocess by Claude Code. Tests run it the
+same way to exercise stdin/stdout exactly as production does.
+
+Claude Code 2.x requires UserPromptSubmit hook output shaped as
+``{"hookSpecificOutput": {"hookEventName": "UserPromptSubmit",
+"additionalContext": "..."}}``. The legacy top-level
+``{"additionalContext": ...}`` shape is silently dropped by the CLI,
+so these tests assert against the nested shape — anything else is a
+broken contract regardless of whether the hook process exits cleanly.
+"""
+
+from __future__ import annotations
+
+import json
+import subprocess
+import sys
+from pathlib import Path
+
+REPO_ROOT = Path(__file__).resolve().parent.parent
+HOOK_SCRIPT = REPO_ROOT / "scripts" / "hooks" / "preflight_reminder.py"
+
+
+def _run_hook(stdin_text: str) -> tuple[int, str, str]:
+    """Invoke the hook with stdin_text on stdin; return (rc, stdout, stderr)."""
+    proc = subprocess.run(
+        [sys.executable, str(HOOK_SCRIPT)],
+        input=stdin_text,
+        capture_output=True,
+        text=True,
+        timeout=10,
+    )
+    return proc.returncode, proc.stdout, proc.stderr
+
+
+def _hook_output(parsed: dict) -> dict:
+    """Extract the hookSpecificOutput payload, asserting the envelope shape."""
+    assert "hookSpecificOutput" in parsed, (
+        f"hook must emit hookSpecificOutput envelope (Claude Code 2.x contract); got {parsed!r}"
+    )
+    inner = parsed["hookSpecificOutput"]
+    assert inner.get("hookEventName") == "UserPromptSubmit"
+    return inner
+
+
+def test_emits_additional_context_on_match():
+    """Fire-worthy prompt produces additionalContext containing the directive."""
+    payload = {"prompt": "Please refactor the rate limiter to sliding window."}
+    rc, out, _ = _run_hook(json.dumps(payload))
+    assert rc == 0
+    inner = _hook_output(json.loads(out))
+    assert "additionalContext" in inner
+    assert "<system-reminder>" in inner["additionalContext"]
+    assert "bicameral.preflight" in inner["additionalContext"]
+
+
+def test_emits_empty_on_no_match():
+    """Skip-worthy prompt produces empty response (no hookSpecificOutput)."""
+    payload = {"prompt": "fix the typo in README"}
+    rc, out, _ = _run_hook(json.dumps(payload))
+    assert rc == 0
+    parsed = json.loads(out) if out.strip() else {}
+    assert "hookSpecificOutput" not in parsed
+
+
+def test_handles_malformed_stdin():
+    """Non-JSON stdin returns rc 0 with empty/no response — never blocks user."""
+    rc, out, _ = _run_hook("this is not JSON at all {[}")
+    assert rc == 0
+    if out.strip():
+        parsed = json.loads(out)
+        assert "hookSpecificOutput" not in parsed
+
+
+def test_idempotent_on_double_fire():
+    """Same prompt twice produces identical output (no state leak)."""
+    payload = {"prompt": "implement the OAuth callback for Google Calendar"}
+    rc1, out1, _ = _run_hook(json.dumps(payload))
+    rc2, out2, _ = _run_hook(json.dumps(payload))
+    assert rc1 == rc2 == 0
+    assert out1 == out2
+
+
+def test_handles_natural_contradiction_prompt():
+    """The literal Flow 2 prompt fires the hook (issue #146 acceptance)."""
+    payload = {
+        "prompt": (
+            "I know the roadmap said drag-and-drop to reorder commits, "
+            "but actually we're switching to a text-editor approach. "
+            "Please update cherry-pick.ts and reorder.ts."
+        )
+    }
+    rc, out, _ = _run_hook(json.dumps(payload))
+    assert rc == 0
+    inner = _hook_output(json.loads(out))
+    assert "additionalContext" in inner
+    assert "bicameral.preflight" in inner["additionalContext"]
+
+
+def test_reminder_gates_writes_not_discovery():
+    """The reminder must allow Read/Grep/Glob discovery before preflight,
+    and gate preflight against WRITE ops only. An earlier shape ("call
+    preflight before any file-inspection tool") short-circuited the
+    caller-LLM discovery the rest of the contract depends on (the agent
+    needs to map "the X feature" → concrete file paths via Read/Grep/Glob
+    before calling preflight). Lock the new posture in so future edits
+    don't quietly regress it.
+    """
+    payload = {"prompt": "refactor the reorder feature to a text-editor flow"}
+    rc, out, _ = _run_hook(json.dumps(payload))
+    assert rc == 0
+    ctx = _hook_output(json.loads(out))["additionalContext"]
+    # Affirmative: discovery comes first, write op is the gate.
+    assert "Read-only discovery FIRST" in ctx
+    assert "BEFORE any write op" in ctx
+    assert "Edit, Write" in ctx
+    # The reminder should explicitly tell the agent to populate file_paths.
+    assert "file_paths" in ctx
+    # Negative: must NOT forbid file-inspection tools (the old shape).
+    assert "before any file-inspection tool" not in ctx
+    assert "Before invoking any file-inspection tool" not in ctx
diff --git a/tests/test_preflight_intent.py b/tests/test_preflight_intent.py
new file mode 100644
index 00000000..4cbc4443
--- /dev/null
+++ b/tests/test_preflight_intent.py
@@ -0,0 +1,70 @@
+"""Functionality tests for scripts.hooks.preflight_intent."""
+
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+REPO_ROOT = Path(__file__).resolve().parent.parent
+sys.path.insert(0, str(REPO_ROOT))
+
+from scripts.hooks.preflight_intent import (  # noqa: E402
+    IMPLEMENTATION_VERBS,
+    INDIRECT_INTENT_PHRASES,
+    SKIP_PATTERNS,
+    should_fire_preflight,
+)
+
+
+def test_fires_on_implementation_verbs():
+    """Every canonical verb in a natural sentence must fire the classifier."""
+    for verb in IMPLEMENTATION_VERBS:
+        prompt = f"Please {verb} the rate limiter for me."
+        assert should_fire_preflight(prompt), f"verb {verb!r} did not fire"
+
+
+def test_skips_on_doc_only_prompts():
+    """Skip patterns must suppress the fire even when verbs are present."""
+    skip_prompts = (
+        "fix the typo in the README",
+        "bump lodash to 4.17.21",
+        "how does the rate limiter work?",
+    )
+    for prompt in skip_prompts:
+        assert not should_fire_preflight(prompt), f"skip-prompt {prompt!r} fired"
+
+
+def test_fires_on_indirect_intent():
+    """Asking HOW to implement is intent — must fire."""
+    indirect = (
+        "how should I implement the retry logic?",
+        "how do I build the payment flow?",
+        "what's the best way to add idempotency keys?",
+    )
+    for prompt in indirect:
+        assert should_fire_preflight(prompt), f"indirect prompt {prompt!r} did not fire"
+
+
+def test_data_is_loadable():
+    """The shared verb list must be importable, non-empty, and well-typed."""
+    assert isinstance(IMPLEMENTATION_VERBS, frozenset)
+    assert len(IMPLEMENTATION_VERBS) >= 28
+    assert all(isinstance(v, str) and v for v in IMPLEMENTATION_VERBS)
+    assert isinstance(INDIRECT_INTENT_PHRASES, tuple)
+    assert all(isinstance(p, str) and p for p in INDIRECT_INTENT_PHRASES)
+    assert isinstance(SKIP_PATTERNS, tuple)
+
+
+def test_natural_contradiction_prompt():
+    """The literal Flow 2 prompt from issue #146 must fire."""
+    prompt = (
+        "I know the roadmap said drag-and-drop to reorder commits, "
+        "but actually we're switching to a text-editor approach. "
+        "Please update cherry-pick.ts and reorder.ts."
+    )
+    assert should_fire_preflight(prompt)
+
+
+def test_empty_prompt_does_not_fire():
+    assert not should_fire_preflight("")
+    assert not should_fire_preflight("   \n\t")
diff --git a/tests/test_project_decision_status.py b/tests/test_project_decision_status.py
index 00674f97..aaac34d0 100644
--- a/tests/test_project_decision_status.py
+++ b/tests/test_project_decision_status.py
@@ -9,6 +9,7 @@
 
 Closes the gap v0.6.1's session-start banner infra couldn't close on its own.
 """
+
 from __future__ import annotations
 
 import pytest
@@ -33,6 +34,7 @@ async def _seed_decision(client: LedgerClient, description: str = "test decision
     # canonical_id has a UNIQUE index — derive a stable unique value from the
     # description so multiple decisions in one test don't collide.
     import hashlib
+
     canonical = hashlib.sha256(description.encode()).hexdigest()[:16]
     rows = await client.query(
         "CREATE decision SET description = $d, canonical_id = $c, source_type = 'manual'",
diff --git a/tests/test_reset.py b/tests/test_reset.py
index dbd607b8..0bef3663 100644
--- a/tests/test_reset.py
+++ b/tests/test_reset.py
@@ -17,7 +17,6 @@
 from context import BicameralContext
 from handlers.reset import handle_reset
 
-
 # ── Helpers ─────────────────────────────────────────────────────────
 
 
@@ -53,7 +52,10 @@ def _payload_for(repo: str, source_type: str, source_ref: str) -> dict:
 
 
 async def _seed_repo_with_cursors(
-    ledger, repo: str, count: int = 3, source_type: str = "slack",
+    ledger,
+    repo: str,
+    count: int = 3,
+    source_type: str = "slack",
 ) -> None:
     """Seed N source_cursor rows for a repo by upserting them directly."""
     for i in range(count):
@@ -72,6 +74,7 @@ def _ctx(repo_path: str = "test-repo") -> BicameralContext:
     are left as whatever from_env builds — reset doesn't use them.
     """
     import os
+
     os.environ["REPO_PATH"] = repo_path
     return BicameralContext.from_env()
 
@@ -139,9 +142,7 @@ async def test_reset_confirm_actually_wipes(monkeypatch, surreal_url):
     for d in post_decisions:
         # description-based check — the seeded decisions had distinctive
         # 'decision from msg_N' descriptions
-        assert "decision from msg_" not in d.get("description", ""), (
-            f"wipe missed an intent: {d}"
-        )
+        assert "decision from msg_" not in d.get("description", ""), f"wipe missed an intent: {d}"
 
     reset_ledger_singleton()
 
diff --git a/tests/test_resolve_compliance.py b/tests/test_resolve_compliance.py
index 5583cb48..21758dcb 100644
--- a/tests/test_resolve_compliance.py
+++ b/tests/test_resolve_compliance.py
@@ -11,6 +11,7 @@
   link_commit + resolve flow on a tmp git repo.
 - not_relevant verdict prunes the binds_to edge + audit row kept
 """
+
 from __future__ import annotations
 
 import subprocess
@@ -72,8 +73,7 @@ async def _seed_region(
     symbol: str = "do_thing",
 ) -> str:
     rows = await client.query(
-        "CREATE code_region SET file_path = $f, symbol_name = $s, "
-        "start_line = 1, end_line = 10",
+        "CREATE code_region SET file_path = $f, symbol_name = $s, start_line = 1, end_line = 10",
         {"f": file_path, "s": symbol},
     )
     return str(rows[0]["id"])
@@ -100,7 +100,9 @@ async def test_resolve_compliance_writes_compliance_check_row():
         )
 
         resp = await handle_resolve_compliance(
-            ctx, phase="ingest", verdicts=[verdict],
+            ctx,
+            phase="ingest",
+            verdicts=[verdict],
         )
 
         assert resp.phase == "ingest"
@@ -246,7 +248,10 @@ async def test_resolve_compliance_mixed_batch_partitions_correctly():
         )
 
         resp = await handle_resolve_compliance(
-            ctx, phase="drift", verdicts=[good, bad], commit_hash="abc123",
+            ctx,
+            phase="drift",
+            verdicts=[good, bad],
+            commit_hash="abc123",
         )
 
         assert len(resp.accepted) == 1
@@ -272,9 +277,7 @@ async def test_resolve_compliance_accepts_all_phase_values():
         decision_id = await _seed_decision(client)
         region_id = await _seed_region(client)
 
-        for i, phase in enumerate(
-            ("ingest", "drift", "regrounding", "supersession", "divergence")
-        ):
+        for i, phase in enumerate(("ingest", "drift", "regrounding", "supersession", "divergence")):
             v = ComplianceVerdict(
                 decision_id=decision_id,
                 region_id=region_id,
@@ -296,7 +299,9 @@ async def test_resolve_compliance_rejects_unknown_phase():
     try:
         with pytest.raises(ValueError, match="Unknown phase"):
             await handle_resolve_compliance(
-                ctx, phase="speculation", verdicts=[],
+                ctx,
+                phase="speculation",
+                verdicts=[],
             )
     finally:
         await _client.close()
@@ -322,7 +327,9 @@ async def test_resolve_compliance_accepts_dict_verdicts():
             "explanation": "from JSON",
         }
         resp = await handle_resolve_compliance(
-            ctx, phase="ingest", verdicts=[verdict_dict],
+            ctx,
+            phase="ingest",
+            verdicts=[verdict_dict],
         )
         assert len(resp.accepted) == 1
     finally:
@@ -354,7 +361,9 @@ async def test_not_relevant_verdict_prunes_binds_to_edge():
             explanation="this region is unrelated",
         )
         resp = await handle_resolve_compliance(
-            ctx, phase="ingest", verdicts=[verdict],
+            ctx,
+            phase="ingest",
+            verdicts=[verdict],
         )
         assert len(resp.accepted) == 1
 
@@ -376,7 +385,11 @@ async def test_not_relevant_verdict_prunes_binds_to_edge():
 
 def _git(cwd: Path, *args: str) -> str:
     result = subprocess.run(
-        ["git", *args], cwd=cwd, capture_output=True, text=True, check=True,
+        ["git", *args],
+        cwd=cwd,
+        capture_output=True,
+        text=True,
+        check=True,
     )
     return result.stdout.strip()
 
@@ -386,12 +399,14 @@ def _seed_repo(root: Path) -> None:
     _git(root, "init", "-q", "-b", "main")
     _git(root, "config", "user.email", "test@example.com")
     _git(root, "config", "user.name", "Test")
-    (root / "pricing.py").write_text(dedent("""
+    (root / "pricing.py").write_text(
+        dedent("""
         def calculate_discount(order_total):
             if order_total >= 100:
                 return order_total * 0.10
             return 0
-    """).lstrip("\n"))
+    """).lstrip("\n")
+    )
     _git(root, "add", "pricing.py")
     _git(root, "-c", "commit.gpgsign=false", "commit", "-q", "-m", "seed")
 
@@ -433,13 +448,15 @@ async def test_e2e_pending_to_reflected_via_resolve(_repo_ctx):
                 },
                 "intent": "Apply 10% discount on orders of $100 or more",
                 "symbols": ["calculate_discount"],
-                "code_regions": [{
-                    "file_path": "pricing.py",
-                    "symbol": "calculate_discount",
-                    "type": "function",
-                    "start_line": 1,
-                    "end_line": 4,
-                }],
+                "code_regions": [
+                    {
+                        "file_path": "pricing.py",
+                        "symbol": "calculate_discount",
+                        "type": "function",
+                        "start_line": 1,
+                        "end_line": 4,
+                    }
+                ],
                 # Ratified signoff required for drift detection to run (v0.7+)
                 "signoff": {
                     "state": "ratified",
@@ -454,9 +471,7 @@ async def test_e2e_pending_to_reflected_via_resolve(_repo_ctx):
 
     assert ingest_resp.sync_status is not None, "ingest should populate sync_status"
     pending = ingest_resp.sync_status.pending_compliance_checks
-    assert len(pending) == 1, (
-        f"Expected one pending check from drift sweep, got {len(pending)}"
-    )
+    assert len(pending) == 1, f"Expected one pending check from drift sweep, got {len(pending)}"
 
     p = pending[0]
     assert p.decision_description == "Apply 10% discount on orders of $100 or more"
@@ -512,13 +527,15 @@ async def test_e2e_noncompliant_verdict_yields_drifted(_repo_ctx):
                 },
                 "intent": "Apply 50% discount on orders of $100 or more",
                 "symbols": ["calculate_discount"],
-                "code_regions": [{
-                    "file_path": "pricing.py",
-                    "symbol": "calculate_discount",
-                    "type": "function",
-                    "start_line": 1,
-                    "end_line": 4,
-                }],
+                "code_regions": [
+                    {
+                        "file_path": "pricing.py",
+                        "symbol": "calculate_discount",
+                        "type": "function",
+                        "start_line": 1,
+                        "end_line": 4,
+                    }
+                ],
                 # Ratified signoff required for drift detection to run (v0.7+)
                 "signoff": {
                     "state": "ratified",
@@ -553,7 +570,10 @@ async def test_e2e_noncompliant_verdict_yields_drifted(_repo_ctx):
     assert len(drifted) == 1
     inner = getattr(ledger, "_inner", ledger)
     cached = await get_compliance_verdict(
-        inner._client, p.decision_id, p.region_id, p.content_hash,
+        inner._client,
+        p.decision_id,
+        p.region_id,
+        p.content_hash,
     )
     assert cached is not None
     assert cached["verdict"] == "drifted"
diff --git a/tests/test_schema_persistence.py b/tests/test_schema_persistence.py
index ec0fc854..ce81c6b4 100644
--- a/tests/test_schema_persistence.py
+++ b/tests/test_schema_persistence.py
@@ -80,6 +80,7 @@ async def test_destructive_migration_blocked(tmp_path):
     allow_destructive=False is safe when there are no destructive steps.
     """
     from ledger.schema import DESTRUCTIVE_MIGRATIONS
+
     url = f"surrealkv://{tmp_path / 'ledger.db'}"
     client = LedgerClient(url=url, ns="bicameral", db="ledger")
     await client.connect()
diff --git a/tests/test_session_end_hook_drift.py b/tests/test_session_end_hook_drift.py
new file mode 100644
index 00000000..a850e1fb
--- /dev/null
+++ b/tests/test_session_end_hook_drift.py
@@ -0,0 +1,87 @@
+"""Functionality tests for SessionEnd hook drift fix per
+plan-147-flow4-ledger-validation.md Phase 2.
+
+Verifies the canonical hook command shape lands in:
+  - .claude/settings.json (the deployed hook)
+  - setup_wizard._BICAMERAL_SESSION_END_COMMAND (the source of truth for
+    fresh installs)
+
+The canonical command per skills/bicameral-capture-corrections/SKILL.md:207:
+
+  [ -d .bicameral ] && [ -z "$BICAMERAL_SESSION_END_RUNNING" ] && \
+    BICAMERAL_SESSION_END_RUNNING=1 \
+    claude -p '/bicameral:capture-corrections --auto-ingest' || true
+"""
+
+from __future__ import annotations
+
+import json
+import sys
+from pathlib import Path
+
+REPO_ROOT = Path(__file__).resolve().parent.parent
+sys.path.insert(0, str(REPO_ROOT))
+
+
+CANONICAL_COMMAND = (
+    '[ -d .bicameral ] && [ -z "$BICAMERAL_SESSION_END_RUNNING" ] && '
+    "BICAMERAL_SESSION_END_RUNNING=1 "
+    "claude -p '/bicameral:capture-corrections --auto-ingest' || true"
+)
+
+
+def _extract_session_end_command() -> str:
+    """Parse .claude/settings.json and return the SessionEnd hook command string."""
+    settings = json.loads((REPO_ROOT / ".claude" / "settings.json").read_text(encoding="utf-8"))
+    session_end = settings["hooks"]["SessionEnd"]
+    return session_end[0]["hooks"][0]["command"]
+
+
+def test_settings_json_session_end_has_reentrancy_guard():
+    """Behavior: deployed SessionEnd hook short-circuits when env var is set."""
+    cmd = _extract_session_end_command()
+    assert '[ -z "$BICAMERAL_SESSION_END_RUNNING" ]' in cmd
+    assert "BICAMERAL_SESSION_END_RUNNING=1" in cmd
+
+
+def test_settings_json_session_end_passes_auto_ingest_flag():
+    """Behavior: deployed SessionEnd hook invokes capture-corrections in batch (auto-ingest) mode."""
+    cmd = _extract_session_end_command()
+    assert "--auto-ingest" in cmd
+
+
+def test_setup_wizard_renders_canonical_session_end_hook():
+    """Behavior: setup_wizard's source-of-truth constant matches the
+    canonical command verbatim. Drift between this constant and the
+    SKILL.md prescription is the failure mode this test exists to catch."""
+    import setup_wizard
+
+    assert setup_wizard._BICAMERAL_SESSION_END_COMMAND == CANONICAL_COMMAND
+
+
+def test_build_session_end_command_no_args_matches_canonical():
+    """Behavior: the parameterized helper, when called with no args,
+    produces the same string as the no-args constant — i.e. end-user
+    installs are unchanged by the helper's existence."""
+    import setup_wizard
+
+    assert setup_wizard._build_session_end_command() == CANONICAL_COMMAND
+
+
+def test_build_session_end_command_with_mcp_config_inserts_flags():
+    """Behavior: passing ``mcp_config_path`` inserts ``--mcp-config <path>``
+    + ``--strict-mcp-config`` after the prompt, before the ``|| true``
+    fallback. This is the test-harness path: spawned subprocess writes
+    to the harness's test ledger instead of the user's default
+    (~/.bicameral/ledger.db)."""
+    import setup_wizard
+
+    cmd = setup_wizard._build_session_end_command(mcp_config_path="/tmp/x/mcp.json")
+    assert "--mcp-config /tmp/x/mcp.json" in cmd
+    assert "--strict-mcp-config" in cmd
+    # Re-entrancy guard and --auto-ingest preserved.
+    assert '[ -z "$BICAMERAL_SESSION_END_RUNNING" ]' in cmd
+    assert "--auto-ingest" in cmd
+    # Path with shell metachar still safe (shlex.quote applied).
+    cmd2 = setup_wizard._build_session_end_command(mcp_config_path="/tmp/with space/mcp.json")
+    assert "'/tmp/with space/mcp.json'" in cmd2
diff --git a/tests/test_sync_middleware.py b/tests/test_sync_middleware.py
index 111cd614..410bf87c 100644
--- a/tests/test_sync_middleware.py
+++ b/tests/test_sync_middleware.py
@@ -1,7 +1,8 @@
 """Tests for sync_middleware — session-start banner and ledger catch-up (v0.6.1)."""
+
 from __future__ import annotations
 
-from datetime import datetime, timedelta, timezone
+from datetime import UTC, datetime, timedelta, timezone
 from pathlib import Path
 from unittest.mock import AsyncMock, MagicMock, patch
 
@@ -47,9 +48,13 @@ def _ungrounded(decision_id="decision:2", description="Billing uses Stripe", sou
     }
 
 
-def _proposal(decision_id="decision:3", description="Rate limit is 100 req/s",
-              source_ref="sprint-notes", days_old=15):
-    created_at = (datetime.now(timezone.utc) - timedelta(days=days_old)).isoformat()
+def _proposal(
+    decision_id="decision:3",
+    description="Rate limit is 100 req/s",
+    source_ref="sprint-notes",
+    days_old=15,
+):
+    created_at = (datetime.now(UTC) - timedelta(days=days_old)).isoformat()
     return {
         "decision_id": decision_id,
         "description": description,
@@ -99,25 +104,28 @@ async def test_banner_includes_ungrounded_decisions():
 async def test_banner_queries_both_drifted_and_ungrounded_statuses():
     ctx = _make_ctx(open_rows=[_drifted()])
     await get_session_start_banner(ctx)
-    ctx.ledger.get_decisions_by_status.assert_called_once_with(["drifted", "ungrounded", "context_pending"])
+    ctx.ledger.get_decisions_by_status.assert_called_once_with(
+        ["drifted", "ungrounded", "context_pending"]
+    )
 
 
 @pytest.mark.asyncio
 async def test_banner_truncates_at_10_items_with_drifted_prioritized():
     # 12 open items: 3 drifted + 9 ungrounded. Truncated view should keep
     # all 3 drifted first, then fill with ungrounded up to the 10-item cap.
-    rows = [_drifted(decision_id=f"decision:d{i}") for i in range(3)] + \
-           [_ungrounded(decision_id=f"decision:u{i}") for i in range(9)]
+    rows = [_drifted(decision_id=f"decision:d{i}") for i in range(3)] + [
+        _ungrounded(decision_id=f"decision:u{i}") for i in range(9)
+    ]
     ctx = _make_ctx(open_rows=rows)
     banner = await get_session_start_banner(ctx)
     assert banner is not None
-    assert banner.drifted_count == 3        # full count, not truncated
+    assert banner.drifted_count == 3  # full count, not truncated
     assert banner.ungrounded_count == 9
-    assert len(banner.items) == 10          # list is capped
+    assert len(banner.items) == 10  # list is capped
     assert banner.truncated is True
     # All 3 drifted must be present in the truncated view
     assert sum(1 for i in banner.items if i["status"] == "drifted") == 3
-    assert f"top 10" in banner.message
+    assert "top 10" in banner.message
 
 
 @pytest.mark.asyncio
@@ -232,6 +240,7 @@ def _reset_locks():
     """Drop the per-repo lock registry before and after each test so lock
     identity is deterministic across tests in the same process."""
     from handlers.sync_middleware import _reset_repo_locks_for_tests
+
     _reset_repo_locks_for_tests()
     yield
     _reset_repo_locks_for_tests()
@@ -251,6 +260,7 @@ async def test_repo_write_barrier_serializes_same_repo(_reset_locks):
     bind call cannot observe the ledger while the first is mid-write.
     """
     import asyncio
+
     from handlers.sync_middleware import repo_write_barrier
 
     events: list[str] = []
@@ -272,6 +282,7 @@ async def task(name: str, hold_ms: int):
 async def test_repo_write_barrier_allows_different_repos_concurrently(_reset_locks):
     """Different repos use different locks and MUST run in parallel."""
     import asyncio
+
     from handlers.sync_middleware import repo_write_barrier
 
     events: list[str] = []
@@ -295,6 +306,7 @@ async def task(name: str, repo: str):
 async def test_repo_write_barrier_releases_on_exception(_reset_locks):
     """If the body raises, the lock must still release so the next caller proceeds."""
     import asyncio
+
     from handlers.sync_middleware import repo_write_barrier
 
     ctx = _barrier_ctx("/repo/a")
@@ -315,6 +327,7 @@ async def reacquire():
 async def test_repo_write_barrier_falls_back_when_repo_path_missing(_reset_locks):
     """Missing ctx.repo_path falls back to a default key and still serializes."""
     import asyncio
+
     from handlers.sync_middleware import repo_write_barrier
 
     class _Bare:
@@ -343,6 +356,7 @@ async def task(name: str):
 async def test_repo_write_barrier_reports_held_ms(_reset_locks):
     """BarrierTiming.held_ms is populated on exit and is non-negative."""
     import asyncio
+
     from handlers.sync_middleware import repo_write_barrier
 
     ctx = _barrier_ctx("/repo/a")
diff --git a/tests/test_team_event_replay.py b/tests/test_team_event_replay.py
index 78647190..ca8dbe78 100644
--- a/tests/test_team_event_replay.py
+++ b/tests/test_team_event_replay.py
@@ -92,9 +92,7 @@ async def test_ratify_event_roundtrip(tmp_path: Path) -> None:
     }
     await team_a.apply_ratify(decision_id_a, signoff)
 
-    rows = await inner_a._client.query(
-        f"SELECT signoff FROM {decision_id_a} LIMIT 1"
-    )
+    rows = await inner_a._client.query(f"SELECT signoff FROM {decision_id_a} LIMIT 1")
     assert rows and rows[0]["signoff"]["state"] == "ratified"
 
     # Fresh adapter, same JSONL log, fresh watermark — replay from 0.
@@ -104,13 +102,10 @@ async def test_ratify_event_roundtrip(tmp_path: Path) -> None:
 
     decision_id_b = await find_decision_by_canonical_id(inner_b._client, canonical)
     assert decision_id_b, "ingest event did not replay (no row for canonical_id)"
-    rows_b = await inner_b._client.query(
-        f"SELECT signoff FROM {decision_id_b} LIMIT 1"
-    )
+    rows_b = await inner_b._client.query(f"SELECT signoff FROM {decision_id_b} LIMIT 1")
     replayed_signoff = rows_b[0].get("signoff") or {}
     assert replayed_signoff.get("state") == "ratified", (
-        "decision_ratified.completed event did not replay; "
-        f"got signoff={replayed_signoff!r}"
+        f"decision_ratified.completed event did not replay; got signoff={replayed_signoff!r}"
     )
 
 
@@ -154,8 +149,7 @@ async def test_supersede_event_roundtrip(tmp_path: Path) -> None:
     rows_b = await inner_b._client.query(f"SELECT signoff FROM {old_id_b} LIMIT 1")
     replayed = rows_b[0].get("signoff") or {}
     assert replayed.get("state") == "superseded", (
-        "decision_superseded.completed event did not replay; "
-        f"got signoff={replayed!r}"
+        f"decision_superseded.completed event did not replay; got signoff={replayed!r}"
     )
     assert replayed.get("superseded_by") == new_id_b
 
@@ -188,8 +182,6 @@ async def test_ingest_event_roundtrip_regression(tmp_path: Path) -> None:
 
     decision_id_b = await find_decision_by_canonical_id(inner_b._client, canonical)
     assert decision_id_b, "ingest.completed regression — canonical lookup failed"
-    rows = await inner_b._client.query(
-        f"SELECT description FROM {decision_id_b} LIMIT 1"
-    )
+    rows = await inner_b._client.query(f"SELECT description FROM {decision_id_b} LIMIT 1")
     assert rows, "ingest.completed regression — decision row missing after replay"
     assert "regression-intent" in str(rows[0].get("description", ""))
diff --git a/tests/test_usage_summary.py b/tests/test_usage_summary.py
index 50068abf..783a6d5b 100644
--- a/tests/test_usage_summary.py
+++ b/tests/test_usage_summary.py
@@ -11,7 +11,9 @@
 from handlers.usage_summary import handle_usage_summary
 
 
-def _ctx_with_decisions(rows: list[dict] | None = None, cc_rows: list[dict] | None = None) -> SimpleNamespace:
+def _ctx_with_decisions(
+    rows: list[dict] | None = None, cc_rows: list[dict] | None = None
+) -> SimpleNamespace:
     """Build a fake ctx whose ledger.client.query returns staged rows."""
     client = MagicMock()
     call_count = {"i": 0}
@@ -102,7 +104,9 @@ async def test_tool_call_counts_from_local_counters(
     monkeypatch.setenv("HOME", str(tmp_path))
     monkeypatch.setenv("USERPROFILE", str(tmp_path))
     import importlib
+
     import local_counters
+
     importlib.reload(local_counters)
     for _ in range(3):
         local_counters.increment("bicameral-ingest")
diff --git a/tests/test_v0410_guided_mode.py b/tests/test_v0410_guided_mode.py
index a9da9dee..8b218f64 100644
--- a/tests/test_v0410_guided_mode.py
+++ b/tests/test_v0410_guided_mode.py
@@ -36,11 +36,10 @@
     SearchDecisionsResponse,
 )
 from handlers.action_hints import (
-    generate_hints_from_findings,
     generate_hints_for_search,
+    generate_hints_from_findings,
 )
 
-
 # ── Helper factories ────────────────────────────────────────────────
 
 
@@ -119,9 +118,11 @@ def test_search_empty_matches_no_hints_in_either_mode():
 
 def test_search_drifted_match_fires_in_normal_mode_as_advisory():
     """v0.4.10: hints fire even in normal mode, just non-blocking."""
-    response = _search_response([
-        _match(intent_id="decision:1", status="drifted", file_path="src/a.ts"),
-    ])
+    response = _search_response(
+        [
+            _match(intent_id="decision:1", status="drifted", file_path="src/a.ts"),
+        ]
+    )
     hints = generate_hints_for_search(response, guided_mode=False)
     assert len(hints) == 1
     h = hints[0]
@@ -134,10 +135,12 @@ def test_search_drifted_match_fires_in_normal_mode_as_advisory():
 
 
 def test_search_drifted_match_fires_in_guided_mode_as_blocking():
-    response = _search_response([
-        _match(intent_id="decision:1", status="drifted", file_path="src/a.ts"),
-        _match(intent_id="decision:2", status="drifted", file_path="src/b.ts"),
-    ])
+    response = _search_response(
+        [
+            _match(intent_id="decision:1", status="drifted", file_path="src/a.ts"),
+            _match(intent_id="decision:2", status="drifted", file_path="src/b.ts"),
+        ]
+    )
     hints = generate_hints_for_search(response, guided_mode=True)
     review = [h for h in hints if h.kind == "review_drift"]
     assert len(review) == 1
@@ -153,9 +156,11 @@ def test_search_drifted_match_fires_in_guided_mode_as_blocking():
 
 
 def test_search_ungrounded_fires_in_both_modes():
-    response = _search_response([
-        _match(intent_id="decision:1", status="ungrounded"),
-    ])
+    response = _search_response(
+        [
+            _match(intent_id="decision:1", status="ungrounded"),
+        ]
+    )
     response.matches[0].code_regions = []
 
     advisory = generate_hints_for_search(response, guided_mode=False)
@@ -180,11 +185,13 @@ def test_search_message_tone_differs_between_modes():
 
 
 def test_search_fires_both_review_and_ground_when_mixed():
-    response = _search_response([
-        _match(intent_id="decision:1", status="drifted"),
-        _match(intent_id="decision:2", status="ungrounded"),
-        _match(intent_id="decision:3", status="reflected"),
-    ])
+    response = _search_response(
+        [
+            _match(intent_id="decision:1", status="drifted"),
+            _match(intent_id="decision:2", status="ungrounded"),
+            _match(intent_id="decision:3", status="reflected"),
+        ]
+    )
     for guided in (False, True):
         hints = generate_hints_for_search(response, guided_mode=guided)
         kinds = {h.kind for h in hints}
@@ -271,11 +278,14 @@ def test_findings_open_question_gap_fires_in_both_modes():
 
 def test_findings_fires_all_three_kinds_when_everything_present():
     drift = [_brief_decision(intent_id="a", status="drifted")]
-    divergences = [BriefDivergence(
-        symbol="X", file_path="src/x.ts",
-        conflicting_decisions=[_brief_decision(), _brief_decision()],
-        summary="conflict",
-    )]
+    divergences = [
+        BriefDivergence(
+            symbol="X",
+            file_path="src/x.ts",
+            conflicting_decisions=[_brief_decision(), _brief_decision()],
+            summary="conflict",
+        )
+    ]
     gaps = [BriefGap(description="open q", hint="open-question phrasing")]
     for guided in (False, True):
         hints = generate_hints_from_findings(divergences, drift, gaps, guided_mode=guided)
@@ -296,22 +306,26 @@ def test_action_hints_default_to_empty_list():
 # ── Context flag parsing ────────────────────────────────────────────
 
 
-@pytest.mark.parametrize("env_val,expected", [
-    ("1", True),
-    ("true", True),
-    ("True", True),
-    ("TRUE", True),
-    ("yes", True),
-    ("on", True),
-    ("0", False),
-    ("false", False),
-    ("no", False),
-    ("off", False),
-    ("maybe", False),  # unrecognized → falls through to config file → false
-])
+@pytest.mark.parametrize(
+    "env_val,expected",
+    [
+        ("1", True),
+        ("true", True),
+        ("True", True),
+        ("TRUE", True),
+        ("yes", True),
+        ("on", True),
+        ("0", False),
+        ("false", False),
+        ("no", False),
+        ("off", False),
+        ("maybe", False),  # unrecognized → falls through to config file → false
+    ],
+)
 def test_guided_mode_env_truthy_set(env_val: str, expected: bool):
     """Truthy/falsy env values map correctly via the helper sets."""
-    from context import _GUIDED_MODE_TRUTHY, _GUIDED_MODE_FALSY
+    from context import _GUIDED_MODE_FALSY, _GUIDED_MODE_TRUTHY
+
     is_truthy = env_val.strip().lower() in _GUIDED_MODE_TRUTHY
     if expected:
         assert is_truthy
@@ -324,6 +338,7 @@ def test_guided_mode_env_truthy_set(env_val: str, expected: bool):
 def test_read_guided_mode_falls_back_to_false_when_no_config(tmp_path, monkeypatch):
     monkeypatch.delenv("BICAMERAL_GUIDED_MODE", raising=False)
     from context import _read_guided_mode
+
     assert _read_guided_mode(str(tmp_path)) is False
 
 
@@ -333,6 +348,7 @@ def test_read_guided_mode_reads_config_yaml_true(tmp_path, monkeypatch):
     cfg_dir.mkdir()
     (cfg_dir / "config.yaml").write_text("mode: solo\nguided: true\n")
     from context import _read_guided_mode
+
     assert _read_guided_mode(str(tmp_path)) is True
 
 
@@ -342,6 +358,7 @@ def test_read_guided_mode_reads_config_yaml_false(tmp_path, monkeypatch):
     cfg_dir.mkdir()
     (cfg_dir / "config.yaml").write_text("mode: solo\nguided: false\n")
     from context import _read_guided_mode
+
     assert _read_guided_mode(str(tmp_path)) is False
 
 
@@ -352,6 +369,7 @@ def test_env_var_overrides_config_file(tmp_path, monkeypatch):
     (cfg_dir / "config.yaml").write_text("mode: solo\nguided: false\n")
     monkeypatch.setenv("BICAMERAL_GUIDED_MODE", "1")
     from context import _read_guided_mode
+
     assert _read_guided_mode(str(tmp_path)) is True
 
 
@@ -362,4 +380,5 @@ def test_env_var_can_force_off_against_config_file(tmp_path, monkeypatch):
     (cfg_dir / "config.yaml").write_text("mode: solo\nguided: true\n")
     monkeypatch.setenv("BICAMERAL_GUIDED_MODE", "0")
     from context import _read_guided_mode
+
     assert _read_guided_mode(str(tmp_path)) is False
diff --git a/tests/test_v0411_latent_drift.py b/tests/test_v0411_latent_drift.py
index 04a8ae3a..0836c3f0 100644
--- a/tests/test_v0411_latent_drift.py
+++ b/tests/test_v0411_latent_drift.py
@@ -30,7 +30,6 @@
 from handlers.link_commit import handle_link_commit
 from ledger.status import get_changed_files, get_changed_files_in_range
 
-
 # ── Helpers ─────────────────────────────────────────────────────────
 
 
@@ -49,18 +48,24 @@ def _seed_repo(repo_root: Path) -> str:
     _git(repo_root, "init", "-q", "-b", "main")
     _git(repo_root, "config", "user.email", "t@e.com")
     _git(repo_root, "config", "user.name", "t")
-    (repo_root / "pricing.py").write_text(dedent("""
+    (repo_root / "pricing.py").write_text(
+        dedent("""
         def calculate_discount(order_total):
             if order_total >= 100:
                 return order_total * 0.10
             return 0
-    """).strip() + "\n")
-    (repo_root / "auth.py").write_text(dedent("""
+    """).strip()
+        + "\n"
+    )
+    (repo_root / "auth.py").write_text(
+        dedent("""
         def validate_token(token):
             if not token:
                 return False
             return len(token) > 10
-    """).strip() + "\n")
+    """).strip()
+        + "\n"
+    )
     _git(repo_root, "add", ".")
     _git(repo_root, "-c", "commit.gpgsign=false", "commit", "-q", "-m", "seed")
     return _git(repo_root, "rev-parse", "HEAD")
@@ -167,12 +172,14 @@ async def test_second_sync_after_gap_uses_range_diff(_isolated_ledger):
 
     # Two commits, two different files
     sha2 = _commit_edit(
-        repo_root, "pricing.py",
+        repo_root,
+        "pricing.py",
         "def calculate_discount(t):\n    return t * 0.5",
         "rewrite pricing",
     )
     sha3 = _commit_edit(
-        repo_root, "auth.py",
+        repo_root,
+        "auth.py",
         "def validate_token(t):\n    return False",
         "rewrite auth",
     )
@@ -186,9 +193,7 @@ async def test_second_sync_after_gap_uses_range_diff(_isolated_ledger):
     ctx2 = _ctx()
     r2 = await handle_link_commit(ctx2, "HEAD")
 
-    assert r2.sweep_scope == "range_diff", (
-        f"Expected range_diff after gap, got {r2.sweep_scope}"
-    )
+    assert r2.sweep_scope == "range_diff", f"Expected range_diff after gap, got {r2.sweep_scope}"
     assert r2.range_size >= 2, (
         f"Expected range sweep to cover both pricing.py + auth.py "
         f"(range_size>=2), got range_size={r2.range_size}"
@@ -216,7 +221,8 @@ async def test_pre_v0411_head_only_would_have_missed_intermediate_drift(
 
     # Drift commit
     _commit_edit(
-        repo_root, "pricing.py",
+        repo_root,
+        "pricing.py",
         "def calculate_discount(t):\n    return t * 999",  # nonsense
         "drift pricing",
     )
@@ -260,7 +266,8 @@ async def test_sync_to_same_sha_fast_paths_with_head_only_scope(_isolated_ledger
 @pytest.mark.phase2
 @pytest.mark.asyncio
 async def test_unreachable_base_sha_falls_back_to_head_only(
-    _isolated_ledger, monkeypatch,
+    _isolated_ledger,
+    monkeypatch,
 ):
     """If ``last_synced_commit`` is unreachable (force-push, shallow
     clone), the range diff returns None and we fall back to head-only.
@@ -274,6 +281,7 @@ async def test_unreachable_base_sha_falls_back_to_head_only(
     # Inject a bogus cursor by patching get_sync_state to return a
     # SHA that doesn't exist in the repo.
     from ledger import adapter as adapter_mod
+
     bogus = "deadbeef" + "0" * 32
 
     real_get_sync_state = adapter_mod.get_sync_state
@@ -297,12 +305,15 @@ async def _bogus_get_sync_state(client, repo_path):
 def test_link_commit_response_contract_has_new_fields():
     """LinkCommitResponse v0.4.11 contract has sweep_scope + range_size."""
     from contracts import LinkCommitResponse
+
     fields = LinkCommitResponse.model_fields
     assert "sweep_scope" in fields
     assert "range_size" in fields
     # Defaults: head_only / 0 — backward compat for callers that don't set them
     inst = LinkCommitResponse(
-        commit_hash="abc", synced=True, reason="new_commit",
+        commit_hash="abc",
+        synced=True,
+        reason="new_commit",
     )
     assert inst.sweep_scope == "head_only"
     assert inst.range_size == 0
@@ -338,7 +349,8 @@ async def test_multi_region_edits_emit_pending_checks_per_region(
     await ledger.connect()
 
     # Append a second function so we have two regions in pricing.py
-    (repo_root / "pricing.py").write_text(dedent("""
+    (repo_root / "pricing.py").write_text(
+        dedent("""
         def calculate_discount(order_total):
             if order_total >= 100:
                 return order_total * 0.10
@@ -347,7 +359,9 @@ def calculate_discount(order_total):
 
         def calculate_tax(order_total):
             return order_total * 0.08
-    """).strip() + "\n")
+    """).strip()
+        + "\n"
+    )
     _git(repo_root, "add", "pricing.py")
     _git(repo_root, "-c", "commit.gpgsign=false", "commit", "-q", "-m", "add tax")
 
@@ -390,14 +404,17 @@ def calculate_tax(order_total):
     await handle_link_commit(ctx, "HEAD")
 
     # Now drift BOTH regions in one commit
-    (repo_root / "pricing.py").write_text(dedent("""
+    (repo_root / "pricing.py").write_text(
+        dedent("""
         def calculate_discount(order_total):
             return order_total * 999  # nonsense
 
 
         def calculate_tax(order_total):
             return order_total * 999  # nonsense
-    """).strip() + "\n")
+    """).strip()
+        + "\n"
+    )
     _git(repo_root, "add", "pricing.py")
     _git(repo_root, "-c", "commit.gpgsign=false", "commit", "-q", "-m", "drift both")
 
@@ -420,15 +437,12 @@ def calculate_tax(order_total):
     # Same intent across both checks (proves the shared-intent case).
     intent_ids = {p.decision_id for p in r2.pending_compliance_checks}
     assert len(intent_ids) == 1, (
-        f"Multi-region test: pending checks should share one decision_id, "
-        f"got {intent_ids}"
+        f"Multi-region test: pending checks should share one decision_id, got {intent_ids}"
     )
 
     # Distinct region_ids — the caller needs independent verdicts per region.
     region_ids = {p.region_id for p in r2.pending_compliance_checks}
-    assert len(region_ids) == 2, (
-        f"Expected 2 distinct region_ids in the batch, got {region_ids}"
-    )
+    assert len(region_ids) == 2, f"Expected 2 distinct region_ids in the batch, got {region_ids}"
 
     # Phase is drift (hash-mismatch triggered re-emission).
     phases = {p.phase for p in r2.pending_compliance_checks}
diff --git a/tests/test_v0412_preflight.py b/tests/test_v0412_preflight.py
index a4f4eabc..a24865dd 100644
--- a/tests/test_v0412_preflight.py
+++ b/tests/test_v0412_preflight.py
@@ -49,7 +49,6 @@
     handle_preflight,
 )
 
-
 # ── Pure helpers ────────────────────────────────────────────────────
 
 
@@ -88,9 +87,7 @@ def test_validate_topic_strips_implementation_verbs():
 
 def test_dedup_key_normalizes_word_order():
     """'Stripe webhook' and 'webhook stripe' should dedup as same topic."""
-    assert _dedup_key_for("Stripe webhook payment") == _dedup_key_for(
-        "payment webhook Stripe"
-    )
+    assert _dedup_key_for("Stripe webhook payment") == _dedup_key_for("payment webhook Stripe")
 
 
 def test_check_dedup_marks_then_hits():
@@ -152,7 +149,9 @@ def _empty_search_response() -> SearchDecisionsResponse:
     return SearchDecisionsResponse(
         query="test",
         sync_status=LinkCommitResponse(
-            commit_hash="abc", synced=True, reason="new_commit",
+            commit_hash="abc",
+            synced=True,
+            reason="new_commit",
         ),
         matches=[],
         ungrounded_count=0,
@@ -164,7 +163,9 @@ def _search_response_with(matches: list[DecisionMatch]) -> SearchDecisionsRespon
     return SearchDecisionsResponse(
         query="test",
         sync_status=LinkCommitResponse(
-            commit_hash="abc", synced=True, reason="new_commit",
+            commit_hash="abc",
+            synced=True,
+            reason="new_commit",
         ),
         matches=matches,
         ungrounded_count=sum(1 for m in matches if m.status == "ungrounded"),
@@ -172,7 +173,9 @@ def _search_response_with(matches: list[DecisionMatch]) -> SearchDecisionsRespon
     )
 
 
-def _match(intent_id: str, status: str = "reflected", file_path: str = "src/foo.ts") -> DecisionMatch:
+def _match(
+    intent_id: str, status: str = "reflected", file_path: str = "src/foo.ts"
+) -> DecisionMatch:
     return DecisionMatch(
         decision_id=intent_id,
         description=f"decision {intent_id}",
@@ -181,13 +184,15 @@ def _match(intent_id: str, status: str = "reflected", file_path: str = "src/foo.
         source_ref="test-ref",
         code_regions=[
             CodeRegionSummary(
-                file_path=file_path, symbol="foo", lines=(1, 10), purpose="",
+                file_path=file_path,
+                symbol="foo",
+                lines=(1, 10),
+                purpose="",
             )
         ],
     )
 
 
-
 @pytest.mark.asyncio
 async def test_topic_too_generic_returns_silent_skip():
     ctx = _ctx()
@@ -240,10 +245,12 @@ async def test_normal_mode_silent_on_plain_matches_only():
     the only matches are reflected with no drift, no divergences, no
     open questions."""
     ctx = _ctx(guided=False)
-    search = _search_response_with([
-        _match("intent:1", status="reflected"),
-        _match("intent:2", status="reflected"),
-    ])
+    search = _search_response_with(
+        [
+            _match("intent:1", status="reflected"),
+            _match("intent:2", status="reflected"),
+        ]
+    )
     with patch(
         "handlers.preflight.handle_search_decisions",
         new=AsyncMock(return_value=search),
@@ -288,11 +295,11 @@ async def test_search_failure_fails_open():
     """Robustness: if search throws, preflight returns fired=false
     silently — never blocks on bicameral being unavailable."""
     ctx = _ctx()
+
     async def _boom(*a, **kw):
         raise RuntimeError("ledger down")
+
     with patch("handlers.preflight.handle_search_decisions", side_effect=_boom):
         r = await handle_preflight(ctx, topic="Stripe webhook payment")
     assert r.fired is False
     assert r.reason == "no_matches"
-
-
diff --git a/tests/test_v0413_canonical_dedup.py b/tests/test_v0413_canonical_dedup.py
index 9abad717..a89cfd08 100644
--- a/tests/test_v0413_canonical_dedup.py
+++ b/tests/test_v0413_canonical_dedup.py
@@ -30,7 +30,6 @@
     canonicalize_text,
 )
 
-
 # ── Source ref canonicalization ─────────────────────────────────────
 
 
@@ -57,7 +56,8 @@ def test_slack_three_variants_collapse():
 
 def test_notion_strips_title_prefix():
     out = canonicalize_source_ref(
-        "notion", "Page-Title-abc123def456abc123def456abc123ef45",
+        "notion",
+        "Page-Title-abc123def456abc123def456abc123ef45",
     )
     # 32-char hex extracted from the end
     assert out.startswith("notion:")
@@ -237,7 +237,8 @@ async def test_upsert_intent_collapses_whitespace_variant(monkeypatch, surreal_u
 
     decisions = await ledger.get_all_decisions(filter="all")
     matching = [
-        d for d in decisions
+        d
+        for d in decisions
         if "redis" in d["description"].lower() and "session" in d["description"].lower()
     ]
     assert len(matching) == 1, (
diff --git a/tests/test_v0414_source_excerpt.py b/tests/test_v0414_source_excerpt.py
index 8bccdb0e..a95436a4 100644
--- a/tests/test_v0414_source_excerpt.py
+++ b/tests/test_v0414_source_excerpt.py
@@ -24,7 +24,6 @@
 
 from adapters.ledger import get_ledger, reset_ledger_singleton
 from context import BicameralContext
-
 from handlers.detect_drift import handle_detect_drift
 from handlers.search_decisions import handle_search_decisions
 
@@ -67,16 +66,17 @@ async def test_search_response_includes_source_excerpt(monkeypatch, surreal_url)
 
     ctx = BicameralContext.from_env()
     response = await handle_search_decisions(
-        ctx, query="token bucket rate limit", max_results=5, min_confidence=0.3,
+        ctx,
+        query="token bucket rate limit",
+        max_results=5,
+        min_confidence=0.3,
     )
     assert response.matches, "Expected at least one match for the ingested decision"
     match = response.matches[0]
     assert "token bucket" in match.source_excerpt.lower(), (
         f"source_excerpt should contain the meeting passage; got {match.source_excerpt!r}"
     )
-    assert "Alex:" in match.source_excerpt, (
-        "speaker prefix should be preserved in the raw passage"
-    )
+    assert "Alex:" in match.source_excerpt, "speaker prefix should be preserved in the raw passage"
     assert match.meeting_date == "2026-03-30", (
         f"meeting_date should round-trip; got {match.meeting_date!r}"
     )
@@ -116,7 +116,10 @@ async def test_empty_source_excerpt_is_graceful(monkeypatch, surreal_url):
 
     ctx = BicameralContext.from_env()
     response = await handle_search_decisions(
-        ctx, query="empty span test", max_results=5, min_confidence=0.3,
+        ctx,
+        query="empty span test",
+        max_results=5,
+        min_confidence=0.3,
     )
     assert response.matches
     assert response.matches[0].source_excerpt == ""
@@ -168,7 +171,9 @@ async def test_drift_entry_carries_source_excerpt(monkeypatch, surreal_url):
 
     ctx = BicameralContext.from_env()
     drift = await handle_detect_drift(
-        ctx, file_path="src/pricing/discount.py", use_working_tree=False,
+        ctx,
+        file_path="src/pricing/discount.py",
+        use_working_tree=False,
     )
     assert drift.decisions, "Expected at least one decision from detect_drift"
     entry = drift.decisions[0]
diff --git a/tests/test_v0416_gap_judge.py b/tests/test_v0416_gap_judge.py
index af835788..95e90955 100644
--- a/tests/test_v0416_gap_judge.py
+++ b/tests/test_v0416_gap_judge.py
@@ -36,7 +36,6 @@
 )
 from handlers.ingest import handle_ingest
 
-
 # ── Layer 1: pure rubric shape tests ────────────────────────────────
 
 
@@ -144,7 +143,10 @@ def test_build_context_decisions_groups_related_by_symbol():
         source_ref="r1",
         code_regions=[
             CodeRegionSummary(
-                file_path="src/limit.py", symbol="Limiter", lines=(1, 10), purpose="",
+                file_path="src/limit.py",
+                symbol="Limiter",
+                lines=(1, 10),
+                purpose="",
             )
         ],
         drift_evidence="",
@@ -160,7 +162,10 @@ def test_build_context_decisions_groups_related_by_symbol():
         source_ref="r2",
         code_regions=[
             CodeRegionSummary(
-                file_path="src/limit.py", symbol="Limiter", lines=(1, 10), purpose="",
+                file_path="src/limit.py",
+                symbol="Limiter",
+                lines=(1, 10),
+                purpose="",
             )
         ],
         drift_evidence="",
@@ -176,7 +181,10 @@ def test_build_context_decisions_groups_related_by_symbol():
         source_ref="r3",
         code_regions=[
             CodeRegionSummary(
-                file_path="src/other.py", symbol="Other", lines=(1, 10), purpose="",
+                file_path="src/other.py",
+                symbol="Other",
+                lines=(1, 10),
+                purpose="",
             )
         ],
         drift_evidence="",
@@ -222,8 +230,12 @@ def _seed_repo(repo_root: Path, body: str) -> None:
     _git(repo_root, "add", ".")
     _git(
         repo_root,
-        "-c", "commit.gpgsign=false",
-        "commit", "-q", "-m", "seed",
+        "-c",
+        "commit.gpgsign=false",
+        "commit",
+        "-q",
+        "-m",
+        "seed",
     )
 
 
@@ -303,7 +315,8 @@ async def test_judge_gaps_honest_empty_path(_isolated_ledger):
     ctx = BicameralContext.from_env()
 
     payload = await handle_judge_gaps(
-        ctx, topic="topic-that-has-no-decisions-anywhere",
+        ctx,
+        topic="topic-that-has-no-decisions-anywhere",
     )
     assert payload is None
 
@@ -333,7 +346,8 @@ async def test_judge_gaps_builds_context_pack(_isolated_ledger):
     # Search BM25 against the decision terms directly — generic topics
     # like "discount pricing" don't rank above min_confidence=0.3.
     judgment = await handle_judge_gaps(
-        ctx, topic="apply 10% discount on orders",
+        ctx,
+        topic="apply 10% discount on orders",
     )
     assert judgment is not None, "judge_gaps must build a pack on matches"
     assert judgment.topic == "apply 10% discount on orders"
@@ -342,9 +356,7 @@ async def test_judge_gaps_builds_context_pack(_isolated_ledger):
     assert "VERBATIM" in judgment.judgment_prompt
     assert judgment.as_of, "as_of must be populated with ISO datetime"
 
-    assert len(judgment.decisions) >= 1, (
-        "judge_gaps should see the just-ingested decision"
-    )
+    assert len(judgment.decisions) >= 1, "judge_gaps should see the just-ingested decision"
     decision = judgment.decisions[0]
     assert "10%" in decision.description or "discount" in decision.description.lower()
     assert "10%" in decision.source_excerpt or "$100" in decision.source_excerpt
diff --git a/tests/test_v0416_natural_format_fields.py b/tests/test_v0416_natural_format_fields.py
index d5c5f674..ed563824 100644
--- a/tests/test_v0416_natural_format_fields.py
+++ b/tests/test_v0416_natural_format_fields.py
@@ -29,9 +29,11 @@
 def test_canonical_description_survives():
     """`decisions[].description` is the canonical field — must produce
     a mapping with the description as the intent."""
-    out = _normalize_payload({
-        "decisions": [{"description": "Use Redis for session cache"}],
-    })
+    out = _normalize_payload(
+        {
+            "decisions": [{"description": "Use Redis for session cache"}],
+        }
+    )
     mappings = out.get("mappings", [])
     assert len(mappings) == 1
     assert mappings[0]["intent"] == "Use Redis for session cache"
@@ -41,9 +43,11 @@ def test_canonical_description_survives():
 def test_canonical_title_fallback():
     """`decisions[].title` is the documented secondary field — used when
     `description` is absent."""
-    out = _normalize_payload({
-        "decisions": [{"title": "Apply 10% discount on orders over $100"}],
-    })
+    out = _normalize_payload(
+        {
+            "decisions": [{"title": "Apply 10% discount on orders over $100"}],
+        }
+    )
     mappings = out.get("mappings", [])
     assert len(mappings) == 1
     assert mappings[0]["intent"] == "Apply 10% discount on orders over $100"
@@ -53,9 +57,11 @@ def test_text_alias_for_decisions():
     """v0.4.16 alias: `text` on a decision should flow through as the
     intent. This is the exact shape the old SKILL.md documented; keeping
     it working guards against a regression."""
-    out = _normalize_payload({
-        "decisions": [{"text": "Cache user sessions in Redis"}],
-    })
+    out = _normalize_payload(
+        {
+            "decisions": [{"text": "Cache user sessions in Redis"}],
+        }
+    )
     mappings = out.get("mappings", [])
     assert len(mappings) == 1
     assert mappings[0]["intent"] == "Cache user sessions in Redis"
@@ -65,12 +71,16 @@ def test_description_preferred_over_text_when_both_present():
     """When a decision has both `description` and `text`, the canonical
     `description` wins. This is the documented priority order:
     description > title > text."""
-    out = _normalize_payload({
-        "decisions": [{
-            "description": "canonical description wins",
-            "text": "alias should lose",
-        }],
-    })
+    out = _normalize_payload(
+        {
+            "decisions": [
+                {
+                    "description": "canonical description wins",
+                    "text": "alias should lose",
+                }
+            ],
+        }
+    )
     mappings = out.get("mappings", [])
     assert len(mappings) == 1
     assert mappings[0]["intent"] == "canonical description wins"
@@ -79,13 +89,15 @@ def test_description_preferred_over_text_when_both_present():
 def test_decision_with_all_text_fields_empty_is_dropped():
     """If a decision has no text in any accepted field, it must be
     silently dropped rather than producing a phantom mapping."""
-    out = _normalize_payload({
-        "decisions": [
-            {"description": "real decision"},
-            {"status": "proposed"},  # no description/title/text
-            {"id": "abc", "participants": ["Ian"]},  # metadata only
-        ],
-    })
+    out = _normalize_payload(
+        {
+            "decisions": [
+                {"description": "real decision"},
+                {"status": "proposed"},  # no description/title/text
+                {"id": "abc", "participants": ["Ian"]},  # metadata only
+            ],
+        }
+    )
     mappings = out.get("mappings", [])
     assert len(mappings) == 1
     assert mappings[0]["intent"] == "real decision"
@@ -95,9 +107,11 @@ def test_action_items_not_written_to_ledger():
     """action_items are accepted in payload for backwards compat but NOT
     written to the ledger (not converted to mappings). They belong in a
     ticket tracker, not the decision ledger."""
-    out = _normalize_payload({
-        "action_items": [{"action": "Write retry tests", "owner": "Ian"}],
-    })
+    out = _normalize_payload(
+        {
+            "action_items": [{"action": "Write retry tests", "owner": "Ian"}],
+        }
+    )
     mappings = out.get("mappings", [])
     assert len(mappings) == 0
 
@@ -105,10 +119,12 @@ def test_action_items_not_written_to_ledger():
 def test_action_items_mixed_with_decisions():
     """When payload has both decisions and action_items, only decisions
     become mappings — action_items are silently ignored."""
-    out = _normalize_payload({
-        "decisions": [{"description": "Use Redis for session cache"}],
-        "action_items": [{"action": "Write retry tests", "owner": "Ian"}],
-    })
+    out = _normalize_payload(
+        {
+            "decisions": [{"description": "Use Redis for session cache"}],
+            "action_items": [{"action": "Write retry tests", "owner": "Ian"}],
+        }
+    )
     mappings = out.get("mappings", [])
     assert len(mappings) == 1
     assert mappings[0]["intent"] == "Use Redis for session cache"
@@ -120,17 +136,19 @@ def test_the_exact_dogfood_payload():
     1 phantom '[Action: Ian] ' mapping, grounded to unrelated symbols.
     After the fix: only real decisions surface; action_items are accepted
     for backwards compat but not written to the ledger."""
-    out = _normalize_payload({
-        "source": "transcript",
-        "title": "demo-gallery",
-        "decisions": [
-            {"text": "Cache user sessions in Redis for horizontal scaling"},
-            {"text": "Apply 10% discount on orders over $100"},
-        ],
-        "action_items": [
-            {"text": "Write tests for retry policy", "owner": "Ian"},
-        ],
-    })
+    out = _normalize_payload(
+        {
+            "source": "transcript",
+            "title": "demo-gallery",
+            "decisions": [
+                {"text": "Cache user sessions in Redis for horizontal scaling"},
+                {"text": "Apply 10% discount on orders over $100"},
+            ],
+            "action_items": [
+                {"text": "Write tests for retry policy", "owner": "Ian"},
+            ],
+        }
+    )
     mappings = out.get("mappings", [])
     intents = [m["intent"] for m in mappings]
     assert "Cache user sessions in Redis for horizontal scaling" in intents
@@ -143,13 +161,15 @@ def test_the_exact_dogfood_payload():
 def test_mixed_canonical_and_alias_in_same_payload():
     """A payload can mix canonical and alias fields across decisions —
     the handler normalizes each decision independently."""
-    out = _normalize_payload({
-        "decisions": [
-            {"description": "First decision via canonical field"},
-            {"title": "Second decision via title fallback"},
-            {"text": "Third decision via text alias"},
-        ],
-    })
+    out = _normalize_payload(
+        {
+            "decisions": [
+                {"description": "First decision via canonical field"},
+                {"title": "Second decision via title fallback"},
+                {"text": "Third decision via text alias"},
+            ],
+        }
+    )
     mappings = out.get("mappings", [])
     assert len(mappings) == 3
     assert mappings[0]["intent"] == "First decision via canonical field"
@@ -160,11 +180,13 @@ def test_mixed_canonical_and_alias_in_same_payload():
 def test_action_items_always_produce_zero_mappings():
     """action_items are never written to the ledger regardless of their fields.
     This guards against the '[Action: <owner>] ' phantom-mapping regression."""
-    out = _normalize_payload({
-        "action_items": [
-            {"action": "real action", "owner": "Ian"},
-            {"action": "another action"},
-        ],
-    })
+    out = _normalize_payload(
+        {
+            "action_items": [
+                {"action": "real action", "owner": "Ian"},
+                {"action": "another action"},
+            ],
+        }
+    )
     mappings = out.get("mappings", [])
     assert len(mappings) == 0
diff --git a/tests/test_v0417_jargon_hygiene.py b/tests/test_v0417_jargon_hygiene.py
index 231135ef..87eb169e 100644
--- a/tests/test_v0417_jargon_hygiene.py
+++ b/tests/test_v0417_jargon_hygiene.py
@@ -58,10 +58,12 @@
 
 
 def _all_skill_files() -> list[Path]:
-    return sorted([
-        *_MCP_ROOT.glob("skills/**/SKILL.md"),
-        *_MCP_ROOT.glob(".claude/skills/**/SKILL.md"),
-    ])
+    return sorted(
+        [
+            *_MCP_ROOT.glob("skills/**/SKILL.md"),
+            *_MCP_ROOT.glob(".claude/skills/**/SKILL.md"),
+        ]
+    )
 
 
 def _compile_patterns() -> list[tuple[str, re.Pattern]]:
@@ -97,10 +99,7 @@ def test_no_backend_jargon_in_skill_files():
             for match in pattern.finditer(body):
                 # Find the line number for a useful error message
                 line_no = body.count("\n", 0, match.start()) + 1
-                offenders.append(
-                    f"{rel}:{line_no}: "
-                    f"'{match.group()}' (term: '{term}')"
-                )
+                offenders.append(f"{rel}:{line_no}: '{match.group()}' (term: '{term}')")
     assert not offenders, (
         "Backend jargon found in user-facing skill files:\n"
         + "\n".join(f"  - {o}" for o in offenders)
@@ -129,9 +128,8 @@ def test_no_backend_jargon_in_tool_descriptions():
             continue
         # Match Tool(...) — plain Name or attribute reference
         func = node.func
-        is_tool = (
-            (isinstance(func, ast.Name) and func.id == "Tool")
-            or (isinstance(func, ast.Attribute) and func.attr == "Tool")
+        is_tool = (isinstance(func, ast.Name) and func.id == "Tool") or (
+            isinstance(func, ast.Attribute) and func.attr == "Tool"
         )
         if not is_tool:
             continue
@@ -152,13 +150,10 @@ def test_no_backend_jargon_in_tool_descriptions():
 
         for term, pattern in patterns:
             for match in pattern.finditer(desc_text):
-                offenders.append(
-                    f"Tool '{tool_name}': '{match.group()}' (term: '{term}')"
-                )
+                offenders.append(f"Tool '{tool_name}': '{match.group()}' (term: '{term}')")
 
-    assert not offenders, (
-        "Backend jargon found in Tool descriptions:\n"
-        + "\n".join(f"  - {o}" for o in offenders)
+    assert not offenders, "Backend jargon found in Tool descriptions:\n" + "\n".join(
+        f"  - {o}" for o in offenders
     )
 
 
diff --git a/tests/test_v0420_history.py b/tests/test_v0420_history.py
index b64403cb..d751dd6a 100644
--- a/tests/test_v0420_history.py
+++ b/tests/test_v0420_history.py
@@ -24,7 +24,6 @@
 from context import BicameralContext
 from handlers.history import handle_history
 
-
 # ── Fixtures ─────────────────────────────────────────────────────────────────
 
 
@@ -105,20 +104,27 @@ async def test_empty_ledger(ctx):
 async def test_single_source_reflected(ctx):
     """One decision with a code region → one feature, one decision, status reflected or ungrounded."""
     ledger = get_ledger()
-    await _ingest(ledger, _payload([
-        _mapping(
-            description="Use tree-sitter for symbol extraction",
-            source_type="transcript",
-            source_ref="sprint-1",
-            code_regions=[{
-                "file_path": "server.py",
-                "symbol": "validate_symbols",
-                "type": "function",
-                "start_line": 10,
-                "end_line": 30,
-            }],
-        )
-    ]))
+    await _ingest(
+        ledger,
+        _payload(
+            [
+                _mapping(
+                    description="Use tree-sitter for symbol extraction",
+                    source_type="transcript",
+                    source_ref="sprint-1",
+                    code_regions=[
+                        {
+                            "file_path": "server.py",
+                            "symbol": "validate_symbols",
+                            "type": "function",
+                            "start_line": 10,
+                            "end_line": 30,
+                        }
+                    ],
+                )
+            ]
+        ),
+    )
 
     response = await handle_history(ctx)
 
@@ -157,10 +163,7 @@ async def test_multi_source_same_decision(ctx):
     response = await handle_history(ctx)
 
     # Count matching decisions across all features
-    matching = [
-        d for f in response.features for d in f.decisions
-        if "Cache sessions" in d.summary
-    ]
+    matching = [d for f in response.features for d in f.decisions if "Cache sessions" in d.summary]
     # With dedup, should be exactly 1
     assert len(matching) == 1, (
         f"Expected 1 deduped decision, got {len(matching)}: {[d.summary for d in matching]}"
@@ -172,14 +175,19 @@ async def test_multi_source_same_decision(ctx):
 async def test_ungrounded_no_fulfillment(ctx):
     """Decision with no code regions → fulfillment is None, status ungrounded or discovered."""
     ledger = get_ledger()
-    await _ingest(ledger, _payload([
-        _mapping(
-            description="Implement SOC2 audit logging",
-            source_type="document",
-            source_ref="compliance-doc",
-            code_regions=[],  # no grounding
-        )
-    ]))
+    await _ingest(
+        ledger,
+        _payload(
+            [
+                _mapping(
+                    description="Implement SOC2 audit logging",
+                    source_type="document",
+                    source_ref="compliance-doc",
+                    code_regions=[],  # no grounding
+                )
+            ]
+        ),
+    )
 
     response = await handle_history(ctx)
 
@@ -196,13 +204,18 @@ async def test_ungrounded_no_fulfillment(ctx):
 async def test_agent_session_source_type(ctx):
     """source_type='agent_session' round-trips through history correctly."""
     ledger = get_ledger()
-    await _ingest(ledger, _payload([
-        _mapping(
-            description="Use event.id for deduplication, not account_id",
-            source_type="agent_session",
-            source_ref="preflight-resolution-stripe-webhook",
-        )
-    ]))
+    await _ingest(
+        ledger,
+        _payload(
+            [
+                _mapping(
+                    description="Use event.id for deduplication, not account_id",
+                    source_type="agent_session",
+                    source_ref="preflight-resolution-stripe-webhook",
+                )
+            ]
+        ),
+    )
 
     response = await handle_history(ctx)
 
@@ -226,28 +239,43 @@ async def test_feature_group_grouping(ctx):
     ledger = get_ledger()
 
     # Two separate ingests, same feature_group
-    await _ingest(ledger, _payload([
-        _mapping(
-            description="Stripe webhook uses SETNX for idempotency",
-            source_ref="sprint-5",
-            feature_group="Stripe Webhooks",
-        )
-    ]))
-    await _ingest(ledger, _payload([
-        _mapping(
-            description="Stripe webhook retries use exponential backoff",
-            source_ref="sprint-5",
-            feature_group="Stripe Webhooks",
-        )
-    ]))
+    await _ingest(
+        ledger,
+        _payload(
+            [
+                _mapping(
+                    description="Stripe webhook uses SETNX for idempotency",
+                    source_ref="sprint-5",
+                    feature_group="Stripe Webhooks",
+                )
+            ]
+        ),
+    )
+    await _ingest(
+        ledger,
+        _payload(
+            [
+                _mapping(
+                    description="Stripe webhook retries use exponential backoff",
+                    source_ref="sprint-5",
+                    feature_group="Stripe Webhooks",
+                )
+            ]
+        ),
+    )
     # Different feature group
-    await _ingest(ledger, _payload([
-        _mapping(
-            description="Google Calendar syncs via OAuth2",
-            source_ref="sprint-6",
-            feature_group="Google Calendar",
-        )
-    ]))
+    await _ingest(
+        ledger,
+        _payload(
+            [
+                _mapping(
+                    description="Google Calendar syncs via OAuth2",
+                    source_ref="sprint-6",
+                    feature_group="Google Calendar",
+                )
+            ]
+        ),
+    )
 
     response = await handle_history(ctx)
 
@@ -280,25 +308,27 @@ async def test_feature_group_fallback_to_query(ctx):
     ledger = get_ledger()
 
     # Ingest without feature_group (pre-v0.5.1 style)
-    await ledger.ingest_payload({
-        "repo": "test-repo",
-        "query": "auth middleware",
-        "mappings": [
-            {
-                "intent": "JWT tokens expire after 24 hours",
-                "span": {
-                    "text": "JWT tokens expire after 24 hours",
-                    "source_type": "transcript",
-                    "source_ref": "auth-sync-2026-04",
-                    "speakers": [],
-                    "meeting_date": "2026-04-01",
-                },
-                "symbols": [],
-                "code_regions": [],
-                # no feature_group
-            }
-        ],
-    })
+    await ledger.ingest_payload(
+        {
+            "repo": "test-repo",
+            "query": "auth middleware",
+            "mappings": [
+                {
+                    "intent": "JWT tokens expire after 24 hours",
+                    "span": {
+                        "text": "JWT tokens expire after 24 hours",
+                        "source_type": "transcript",
+                        "source_ref": "auth-sync-2026-04",
+                        "speakers": [],
+                        "meeting_date": "2026-04-01",
+                    },
+                    "symbols": [],
+                    "code_regions": [],
+                    # no feature_group
+                }
+            ],
+        }
+    )
 
     response = await handle_history(ctx)
 
@@ -323,13 +353,18 @@ async def test_truncation_at_50_features(ctx):
 
     # Create 51 decisions with distinct feature_groups
     for i in range(51):
-        await _ingest(ledger, _payload([
-            _mapping(
-                description=f"Decision for feature area {i}",
-                source_ref=f"ref-{i}",
-                feature_group=f"Feature Area {i:03d}",
-            )
-        ]))
+        await _ingest(
+            ledger,
+            _payload(
+                [
+                    _mapping(
+                        description=f"Decision for feature area {i}",
+                        source_ref=f"ref-{i}",
+                        feature_group=f"Feature Area {i:03d}",
+                    )
+                ]
+            ),
+        )
 
     response = await handle_history(ctx)
 
@@ -347,20 +382,30 @@ async def test_feature_filter(ctx):
     ledger = get_ledger()
 
     # Create two distinct feature groups
-    await _ingest(ledger, _payload([
-        _mapping(
-            description="Checkout uses Stripe payment intents",
-            source_ref="ref-checkout",
-            feature_group="Checkout Flow",
-        )
-    ]))
-    await _ingest(ledger, _payload([
-        _mapping(
-            description="Auth uses JWT with 24h expiry",
-            source_ref="ref-auth",
-            feature_group="Auth Middleware",
-        )
-    ]))
+    await _ingest(
+        ledger,
+        _payload(
+            [
+                _mapping(
+                    description="Checkout uses Stripe payment intents",
+                    source_ref="ref-checkout",
+                    feature_group="Checkout Flow",
+                )
+            ]
+        ),
+    )
+    await _ingest(
+        ledger,
+        _payload(
+            [
+                _mapping(
+                    description="Auth uses JWT with 24h expiry",
+                    source_ref="ref-auth",
+                    feature_group="Auth Middleware",
+                )
+            ]
+        ),
+    )
 
     response = await handle_history(ctx, feature_filter="checkout")
 
@@ -380,13 +425,18 @@ async def test_feature_filter(ctx):
 async def test_include_superseded_false(ctx):
     """include_superseded=False excludes superseded decisions from response."""
     ledger = get_ledger()
-    await _ingest(ledger, _payload([
-        _mapping(
-            description="Use Redis for session caching",
-            source_ref="sprint-1",
-            feature_group="Session Management",
-        )
-    ]))
+    await _ingest(
+        ledger,
+        _payload(
+            [
+                _mapping(
+                    description="Use Redis for session caching",
+                    source_ref="sprint-1",
+                    feature_group="Session Management",
+                )
+            ]
+        ),
+    )
 
     # All decisions will be ungrounded (not superseded) in this test,
     # so we just verify the parameter is accepted and response is valid.
@@ -403,13 +453,18 @@ async def test_include_superseded_false(ctx):
 async def test_response_structure(ctx):
     """HistoryResponse has the correct structure and types."""
     ledger = get_ledger()
-    await _ingest(ledger, _payload([
-        _mapping(
-            description="Rate limit API calls to 1000 req/min per tenant",
-            source_ref="sprint-3",
-            feature_group="Rate Limiting",
-        )
-    ]))
+    await _ingest(
+        ledger,
+        _payload(
+            [
+                _mapping(
+                    description="Rate limit API calls to 1000 req/min per tenant",
+                    source_ref="sprint-3",
+                    feature_group="Rate Limiting",
+                )
+            ]
+        ),
+    )
 
     response = await handle_history(ctx)
 
diff --git a/tests/test_v048_sync_dedup.py b/tests/test_v048_sync_dedup.py
index 94fa358b..46d45bb4 100644
--- a/tests/test_v048_sync_dedup.py
+++ b/tests/test_v048_sync_dedup.py
@@ -55,8 +55,12 @@ def _seed_repo(repo_root: Path, body: str) -> None:
     _git(repo_root, "add", ".")
     _git(
         repo_root,
-        "-c", "commit.gpgsign=false",
-        "commit", "-q", "-m", "seed",
+        "-c",
+        "commit.gpgsign=false",
+        "commit",
+        "-q",
+        "-m",
+        "seed",
     )
 
 
@@ -65,8 +69,12 @@ def _commit_edit(repo_root: Path, new_body: str, message: str) -> None:
     _git(repo_root, "add", "pricing.py")
     _git(
         repo_root,
-        "-c", "commit.gpgsign=false",
-        "commit", "-q", "-m", message,
+        "-c",
+        "commit.gpgsign=false",
+        "commit",
+        "-q",
+        "-m",
+        message,
     )
 
 
@@ -114,8 +122,7 @@ async def test_dedup_second_call_normalizes_reason(_isolated_ledger):
     r2 = await handle_link_commit(ctx, "HEAD")
 
     assert r2.reason == "already_synced", (
-        f"Dedup hit must normalize reason to 'already_synced', "
-        f"got {r2.reason!r}"
+        f"Dedup hit must normalize reason to 'already_synced', got {r2.reason!r}"
     )
     # Cached fields should match the first call's real values (B23).
     assert r2.commit_hash == r1.commit_hash
@@ -149,9 +156,7 @@ async def _counting_ingest_commit(*args, **kwargs):
 
     ctx = _ctx()
     await handle_link_commit(ctx, "HEAD")
-    assert call_count["n"] == 1, (
-        f"First call should hit the ledger once, got {call_count['n']}"
-    )
+    assert call_count["n"] == 1, f"First call should hit the ledger once, got {call_count['n']}"
 
     # Second call WITHOUT invalidate — dedup short-circuits, no ledger hit.
     await handle_link_commit(ctx, "HEAD")
@@ -202,8 +207,7 @@ def calculate_discount(order_total):
         f"trusting it instead of re-reading git HEAD."
     )
     assert r2.commit_hash != r1.commit_hash, (
-        f"New HEAD SHA should differ from old. r1={r1.commit_hash!r}, "
-        f"r2={r2.commit_hash!r}"
+        f"New HEAD SHA should differ from old. r1={r1.commit_hash!r}, r2={r2.commit_hash!r}"
     )
 
 
@@ -224,7 +228,6 @@ async def test_explicit_sha_dedup(_isolated_ledger):
     r2 = await handle_link_commit(ctx, head_sha)
 
     assert r2.reason == "already_synced", (
-        f"Second call with same explicit SHA should dedup — "
-        f"got reason={r2.reason!r}"
+        f"Second call with same explicit SHA should dedup — got reason={r2.reason!r}"
     )
     assert r2.commit_hash == r1.commit_hash
diff --git a/tests/test_v055_region_anchored_preflight.py b/tests/test_v055_region_anchored_preflight.py
index 73b2c01e..94886243 100644
--- a/tests/test_v055_region_anchored_preflight.py
+++ b/tests/test_v055_region_anchored_preflight.py
@@ -30,7 +30,6 @@
     handle_preflight,
 )
 
-
 # ── Fixtures ────────────────────────────────────────────────────────────────
 
 
@@ -82,12 +81,14 @@ def _make_ctx(
     queried.
     """
     ledger = MagicMock()
-    ledger.ingest_commit = AsyncMock(return_value={
-        "commit_hash": "abc123",
-        "new_decisions_linked": 0,
-        "drift_detected": [],
-        "symbols_indexed": 0,
-    })
+    ledger.ingest_commit = AsyncMock(
+        return_value={
+            "commit_hash": "abc123",
+            "new_decisions_linked": 0,
+            "drift_detected": [],
+            "symbols_indexed": 0,
+        }
+    )
     ledger.get_decisions_for_files = AsyncMock(return_value=region_decisions or [])
     ledger.search_by_query = AsyncMock(return_value=[])
 
@@ -221,9 +222,17 @@ async def test_preflight_fires_on_region_hit_no_keyword():
     )
 
     with (
-        patch("handlers.link_commit.handle_link_commit", new=AsyncMock(return_value=_make_link_commit_response())),
-        patch("handlers.search_decisions.handle_link_commit", new=AsyncMock(return_value=_make_link_commit_response())),
-        patch("handlers.preflight.handle_search_decisions", new=AsyncMock(return_value=search_resp)),
+        patch(
+            "handlers.link_commit.handle_link_commit",
+            new=AsyncMock(return_value=_make_link_commit_response()),
+        ),
+        patch(
+            "handlers.search_decisions.handle_link_commit",
+            new=AsyncMock(return_value=_make_link_commit_response()),
+        ),
+        patch(
+            "handlers.preflight.handle_search_decisions", new=AsyncMock(return_value=search_resp)
+        ),
     ):
         resp = await handle_preflight(
             ctx,
@@ -248,9 +257,17 @@ async def test_preflight_region_in_sources_chained():
     )
 
     with (
-        patch("handlers.link_commit.handle_link_commit", new=AsyncMock(return_value=_make_link_commit_response())),
-        patch("handlers.search_decisions.handle_link_commit", new=AsyncMock(return_value=_make_link_commit_response())),
-        patch("handlers.preflight.handle_search_decisions", new=AsyncMock(return_value=search_resp)),
+        patch(
+            "handlers.link_commit.handle_link_commit",
+            new=AsyncMock(return_value=_make_link_commit_response()),
+        ),
+        patch(
+            "handlers.search_decisions.handle_link_commit",
+            new=AsyncMock(return_value=_make_link_commit_response()),
+        ),
+        patch(
+            "handlers.preflight.handle_search_decisions", new=AsyncMock(return_value=search_resp)
+        ),
     ):
         resp = await handle_preflight(
             ctx,
@@ -287,9 +304,17 @@ async def test_preflight_topic_only_no_file_paths_still_works():
     )
 
     with (
-        patch("handlers.link_commit.handle_link_commit", new=AsyncMock(return_value=_make_link_commit_response())),
-        patch("handlers.search_decisions.handle_link_commit", new=AsyncMock(return_value=_make_link_commit_response())),
-        patch("handlers.preflight.handle_search_decisions", new=AsyncMock(return_value=search_resp)),
+        patch(
+            "handlers.link_commit.handle_link_commit",
+            new=AsyncMock(return_value=_make_link_commit_response()),
+        ),
+        patch(
+            "handlers.search_decisions.handle_link_commit",
+            new=AsyncMock(return_value=_make_link_commit_response()),
+        ),
+        patch(
+            "handlers.preflight.handle_search_decisions", new=AsyncMock(return_value=search_resp)
+        ),
     ):
         resp = await handle_preflight(ctx, topic="drifted stripe webhook handler")