From d9262255f64d76aed8900006578c8f8ab2295cd4 Mon Sep 17 00:00:00 2001 From: WulfForge Date: Mon, 1 Jun 2026 23:59:15 -0400 Subject: [PATCH] =?UTF-8?q?fix(hooks):=20#170=20=E2=80=94=20gate=20the=20p?= =?UTF-8?q?ost-preflight=20capture=20reminder=20to=20non-read-only=20promp?= =?UTF-8?q?ts?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PostToolUse capture reminder fired on every preflight that surfaced >=1 decision, asking the user to disambiguate a possible refinement even on read-only and tangential prompts — spam, with a risk of spurious/hallucinated captures. Gate: suppress the reminder ONLY for unambiguously read-only / informational prompts that carry no implementation verb (`suppress_capture_reminder` in scripts/hooks/preflight_intent.py). The UserPromptSubmit hook hands the prompt to the PostToolUse hook via a session-scoped store (scripts/hooks/session_prompt_store.py; 24h staleness sweep + SessionEnd cleanup). DESIGN DECISION (operator-ruled after an audit VETO) — "read-only-only" suppression: A lexical gate cannot distinguish a truly-compatible prompt ("add tests for X") from a refinement smuggled under a compatible verb ("add tests for X, and expose it as a programmatic API"). Suppressing compatible-WRITE prompts would regress #175 (silent, irreversible decision-capture loss). So the gate fails open on ANY implementation verb — only verb-free read-only prompts are ever suppressed. CONSEQUENCE: acceptance case (b) ("add tests for drag-to-reorder" -> suppress) is intentionally NOT implemented; it fires, preserving #175. The issue's acceptance should be amended to "suppress read-only / informational prompts only." Governed via /qor-auto-dev-1: plan VETOed (the lexical-gate hole above), re-planned to the operator's read-only-only ruling, re-audited PASS. Tests lock the invariant (write-verb prompts — incl. the smuggled-refinement case — must fire). META_LEDGER seal deferred to merge time: this branch and PR #534 both branch off the same dev tip (Entry #55); sealing here would fork the Merkle chain against #534's pending Entry #56. The entry is added (next free number, re-chained off the dev tip) when this branch merges. 🤖 Authored via [Qor-logic SDLC](https://github.com/MythologIQ-Labs-LLC/qor-logic) on [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.8 (1M context) --- .../hooks/post_preflight_capture_reminder.py | 11 ++ scripts/hooks/preflight_intent.py | 53 +++++++ scripts/hooks/preflight_reminder.py | 6 + scripts/hooks/session_end_queue_writer.py | 10 +- scripts/hooks/session_prompt_store.py | 92 ++++++++++++ skills/bicameral-preflight/SKILL.md | 17 ++- tests/test_post_preflight_capture_reminder.py | 134 ++++++++++++++++++ tests/test_preflight_intent.py | 57 ++++++++ 8 files changed, 378 insertions(+), 2 deletions(-) create mode 100644 scripts/hooks/session_prompt_store.py create mode 100644 tests/test_post_preflight_capture_reminder.py diff --git a/scripts/hooks/post_preflight_capture_reminder.py b/scripts/hooks/post_preflight_capture_reminder.py index c2669274..71112390 100644 --- a/scripts/hooks/post_preflight_capture_reminder.py +++ b/scripts/hooks/post_preflight_capture_reminder.py @@ -45,6 +45,11 @@ import json import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) +from hooks.preflight_intent import suppress_capture_reminder # noqa: E402 +from hooks.session_prompt_store import read_session_prompt # noqa: E402 PREFLIGHT_TOOL_NAME = "mcp__bicameral__bicameral_preflight" @@ -133,6 +138,12 @@ def main() -> int: decisions = response.get("decisions") or [] if not isinstance(decisions, list) or not decisions: return 0 + # #170 — suppress on read-only/informational prompts. Recall-biased: a + # missing/unreadable prompt or any implementation-verb prompt fires, so a + # refinement (smuggled or not) is never silently dropped (#175 invariant). + prompt = read_session_prompt(str(payload.get("session_id") or "")) or "" + if suppress_capture_reminder(prompt): + return 0 json.dump( { "hookSpecificOutput": { diff --git a/scripts/hooks/preflight_intent.py b/scripts/hooks/preflight_intent.py index b476ddcd..ede15b39 100644 --- a/scripts/hooks/preflight_intent.py +++ b/scripts/hooks/preflight_intent.py @@ -87,3 +87,56 @@ def should_fire_preflight(prompt: str) -> bool: return True lowered = prompt.lower() return any(phrase in lowered for phrase in INDIRECT_INTENT_PHRASES) + + +# #170 — read-only / informational intent. Used by the post-preflight capture +# reminder gate to suppress the Step 5.6 nudge on prompts that cannot be +# implementing a refinement. Deliberately NARROW and applied only AFTER the +# implementation-verb check (see suppress_capture_reminder): any prompt with a +# write verb is never suppressed, so a refinement smuggled under a compatible +# verb ("add tests ... and expose X as a programmatic API") always fires the +# reminder. This preserves the #175 invariant that contradiction judgment is +# never decided by a lexical scan over a write-intent prompt. +READ_ONLY_PATTERNS: tuple[re.Pattern[str], ...] = ( + re.compile(r"\bexplain\b", re.IGNORECASE), + re.compile(r"\bhow (?:does|do|did|can|would|is|are)\b", re.IGNORECASE), + re.compile(r"\bwhat(?:'s| is| are| does| do)\b", re.IGNORECASE), + re.compile(r"\bwhy (?:does|do|did|is|are)\b", re.IGNORECASE), + re.compile(r"\bdescribe\b", re.IGNORECASE), + re.compile(r"\bsummar(?:ize|ise|y)\b", re.IGNORECASE), + re.compile(r"\breview\b", re.IGNORECASE), + re.compile(r"\bshow me\b", re.IGNORECASE), + re.compile(r"\bwalk me through\b", re.IGNORECASE), + re.compile(r"\btell me about\b", re.IGNORECASE), + re.compile(r"\bwhere (?:is|are|does|do)\b", re.IGNORECASE), + re.compile(r"\bunderstand\b", re.IGNORECASE), + re.compile(r"\blook at\b", re.IGNORECASE), +) + + +def suppress_capture_reminder(prompt: str) -> bool: + """Return True iff the post-preflight capture reminder should be SUPPRESSED. + + #170: the reminder (Step 5.6 of bicameral-preflight) is otherwise injected on + every preflight call that surfaces >=1 decision, asking the user to + disambiguate a possible refinement. That is spam on read-only / informational + prompts where no refinement exists. + + Recall-biased gate (operator-selected "read-only-only" suppression, preserving + the #175 no-data-loss invariant): + 1. empty / whitespace prompt -> False (fire; uncertain) + 2. ANY implementation verb present -> False (fire; could be implementing a + refinement, smuggled or not — never suppress a write-intent prompt) + 3. otherwise, suppress iff a read-only / informational signal is present. + + Because step 2 runs before step 3, the read-only patterns only ever evaluate + verb-free prompts — so their loose matching can only suppress prompts that + cannot be performing an edit this turn. NOTE: this intentionally does NOT + reuse should_fire_preflight, whose SKIP_PATTERNS (e.g. ``add ... test``) would + misclassify write prompts as non-firing and reopen the #175 hole. + """ + if not prompt or not prompt.strip(): + return False + if _VERB_REGEX.search(prompt): + return False + return any(pat.search(prompt) for pat in READ_ONLY_PATTERNS) diff --git a/scripts/hooks/preflight_reminder.py b/scripts/hooks/preflight_reminder.py index 7ea081b2..b1048349 100644 --- a/scripts/hooks/preflight_reminder.py +++ b/scripts/hooks/preflight_reminder.py @@ -40,6 +40,7 @@ sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) from hooks.preflight_intent import should_fire_preflight # noqa: E402 +from hooks.session_prompt_store import write_session_prompt # noqa: E402 REMINDER_TEXT = ( "\n" @@ -76,6 +77,11 @@ def main() -> int: except (json.JSONDecodeError, ValueError): return 0 prompt = payload.get("prompt", "") if isinstance(payload, dict) else "" + # #170 — hand the prompt off to the PostToolUse capture-reminder gate, which + # cannot see the prompt from its own payload. Write regardless of fire intent + # (a read-only prompt the agent still preflights on must be classifiable). + session_id = payload.get("session_id", "") if isinstance(payload, dict) else "" + write_session_prompt(str(session_id or ""), prompt) if should_fire_preflight(prompt): json.dump( { diff --git a/scripts/hooks/session_end_queue_writer.py b/scripts/hooks/session_end_queue_writer.py index ae7d6ad7..7ea16abc 100644 --- a/scripts/hooks/session_end_queue_writer.py +++ b/scripts/hooks/session_end_queue_writer.py @@ -24,6 +24,9 @@ from pathlib import Path sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent)) +sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) +from hooks.session_prompt_store import cleanup_session_prompt # noqa: E402 + from events.transcript_queue import write_pending # noqa: E402 @@ -34,7 +37,12 @@ def main() -> int: return 0 if not isinstance(payload, dict): return 0 - session_id = str(payload.get("session_id") or uuid.uuid4()) + session_id_raw = str(payload.get("session_id") or "") + # #170 — best-effort cleanup of the prompt handoff file, before the + # transcript early-return so it runs even when no transcript is queued. + if session_id_raw: + cleanup_session_prompt(session_id_raw) + session_id = session_id_raw or str(uuid.uuid4()) transcript_path = str(payload.get("transcript_path", "")).strip() cwd = str(payload.get("cwd", "")).strip() if not transcript_path or not cwd: diff --git a/scripts/hooks/session_prompt_store.py b/scripts/hooks/session_prompt_store.py new file mode 100644 index 00000000..b30babdb --- /dev/null +++ b/scripts/hooks/session_prompt_store.py @@ -0,0 +1,92 @@ +"""Session-scoped prompt handoff for the preflight hook pair (#170). + +The PostToolUse capture-reminder gate needs the user's prompt to classify it, +but PostToolUse payloads carry only ``tool_name``/``tool_input``/``tool_response`` +— not the prompt. The UserPromptSubmit hook (which DOES receive the prompt) +writes it here keyed by Claude Code ``session_id``; the PostToolUse hook reads it +back; the SessionEnd hook deletes it. + +All operations are best-effort and never raise — a broken handoff degrades to +"fire the reminder" (recall-biased), never to a crash that blocks the user. + +Storage: ``/bicameral-prompts/.txt``. To bound orphans from +sessions that crash without firing SessionEnd, ``write_session_prompt`` also +sweeps files older than ``_STALE_SECONDS`` on each write. +""" + +from __future__ import annotations + +import re +import tempfile +import time +from pathlib import Path + +_DIR_NAME = "bicameral-prompts" +_STALE_SECONDS = 24 * 60 * 60 # 24h — orphan-file staleness bound +_SAFE_SESSION = re.compile(r"[^A-Za-z0-9_.-]") + + +def _store_dir() -> Path: + return Path(tempfile.gettempdir()) / _DIR_NAME + + +def _path_for(session_id: str) -> Path | None: + sid = _SAFE_SESSION.sub("_", session_id.strip()) + if not sid: + return None + return _store_dir() / f"{sid}.txt" + + +def _sweep_stale(now: float | None = None) -> None: + """Delete prompt files older than the staleness bound. Best-effort.""" + now = time.time() if now is None else now + try: + for f in _store_dir().glob("*.txt"): + try: + if now - f.stat().st_mtime > _STALE_SECONDS: + f.unlink() + except OSError: + continue + except OSError: + return + + +def write_session_prompt(session_id: str, prompt: str) -> None: + """Persist ``prompt`` for ``session_id`` and sweep stale orphans.""" + if not session_id or not prompt: + return + path = _path_for(session_id) + if path is None: + return + try: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(prompt, encoding="utf-8") + except OSError: + return + _sweep_stale() + + +def read_session_prompt(session_id: str) -> str | None: + """Return the stored prompt for ``session_id``, or None if unavailable.""" + if not session_id: + return None + path = _path_for(session_id) + if path is None: + return None + try: + return path.read_text(encoding="utf-8") + except OSError: + return None + + +def cleanup_session_prompt(session_id: str) -> None: + """Delete the stored prompt for ``session_id`` (graceful SessionEnd path).""" + if not session_id: + return + path = _path_for(session_id) + if path is None: + return + try: + path.unlink() + except OSError: + return diff --git a/skills/bicameral-preflight/SKILL.md b/skills/bicameral-preflight/SKILL.md index ca399d70..4b37fcdb 100644 --- a/skills/bicameral-preflight/SKILL.md +++ b/skills/bicameral-preflight/SKILL.md @@ -515,7 +515,7 @@ Narrate one line: *"Captured refinement: '' — wired as #### Hook reinforcement A PostToolUse hook scoped to `mcp__bicameral__bicameral_preflight` injects a -`` after every preflight call that surfaces ≥1 decision. The +`` after a preflight call that surfaces ≥1 decision. The reminder templates Step 5.6.1's `AskUserQuestion` shape with the surfaced `decision_id` + description filled in, so the question fires reliably even when the agent's natural inclination would be to skip the disambiguation. @@ -523,6 +523,21 @@ Source: `scripts/hooks/post_preflight_capture_reminder.py`; wired by `setup_wizard._install_claude_hooks` and the e2e harness's `materialize_settings_with_hooks`. +**Read-only suppression gate (#170).** The reminder is suppressed for +unambiguously read-only / informational prompts (e.g. "explain how the reorder +flow works", "review the session module") where no refinement can exist. The +gate is **recall-biased and deliberately narrow**: it suppresses ONLY when the +prompt carries a read-only signal AND **no** implementation verb +(`suppress_capture_reminder` in `scripts/hooks/preflight_intent.py`). Any +write-intent prompt — including a refinement smuggled under a compatible verb +("add tests … and expose X as a programmatic API") — always fires, preserving +the #175 invariant that contradiction is never decided by a lexical scan over a +write prompt. The prompt is handed from the UserPromptSubmit hook to this hook +via `scripts/hooks/session_prompt_store.py` (session-scoped, swept after 24h and +deleted on SessionEnd). Consequence: a compatible-write prompt like "add tests +for drag-to-reorder" still fires the disambiguation (it cannot be lexically told +apart from a smuggled refinement) — accepted by design over the data-loss risk. + ### 6. Honor blocking hints (guided mode vs normal mode) The agent's `guided_mode` setting controls whether action hints are diff --git a/tests/test_post_preflight_capture_reminder.py b/tests/test_post_preflight_capture_reminder.py new file mode 100644 index 00000000..df4df972 --- /dev/null +++ b/tests/test_post_preflight_capture_reminder.py @@ -0,0 +1,134 @@ +"""Integration tests for the #170 capture-reminder suppression gate. + +Drives the real PostToolUse hook ``main()`` with JSON payloads on stdin and a +real (tempdir-redirected) session prompt file written by the real +``session_prompt_store`` — no mocks of the classifier or the store. Covers the +three #170 acceptance fixtures, the recall-bias fallback (missing prompt → fire), +and the cleanup/staleness story. +""" + +from __future__ import annotations + +import io +import json +import sys +import tempfile +import time +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parent.parent +sys.path.insert(0, str(REPO_ROOT)) +sys.path.insert(0, str(REPO_ROOT / "scripts")) + +from scripts.hooks import post_preflight_capture_reminder as hook # noqa: E402 +from scripts.hooks import session_end_queue_writer as session_end # noqa: E402 +from scripts.hooks import session_prompt_store as store # noqa: E402 + +_SID = "sess-170-test" +_DECISIONS = [{"decision_id": "decision:abc", "description": "Drag-to-reorder commits"}] + + +def _run_hook(payload: dict) -> str: + """Invoke the hook main() with payload on stdin; return captured stdout.""" + old_in, old_out = sys.stdin, sys.stdout + sys.stdin = io.StringIO(json.dumps(payload)) + sys.stdout = io.StringIO() + try: + hook.main() + return sys.stdout.getvalue() + finally: + sys.stdin, sys.stdout = old_in, old_out + + +def _payload(session_id: str = _SID) -> dict: + return { + "tool_name": "mcp__bicameral__bicameral_preflight", + "tool_response": {"fired": True, "decisions": _DECISIONS}, + "session_id": session_id, + } + + +def _fired(stdout: str) -> bool: + return "system-reminder" in stdout and "AskUserQuestion" in stdout + + +def _redirect_store(monkeypatch, tmp_path) -> None: + monkeypatch.setattr(tempfile, "gettempdir", lambda: str(tmp_path)) + + +def test_contradiction_prompt_fires(monkeypatch, tmp_path): + """Acceptance (a): a contradiction / write-intent prompt → reminder fires.""" + _redirect_store(monkeypatch, tmp_path) + store.write_session_prompt( + _SID, "actually we're switching reorder to buttons — update reorder.ts" + ) + assert _fired(_run_hook(_payload())) + + +def test_compatible_add_tests_still_fires(monkeypatch, tmp_path): + """Acceptance (b) trade-off: 'add tests for X' carries 'add' → NOT suppressed. + Documented #170 decision (preserve #175 over suppressing compatible writes).""" + _redirect_store(monkeypatch, tmp_path) + store.write_session_prompt(_SID, "add tests for drag-to-reorder") + assert _fired(_run_hook(_payload())) + + +def test_read_only_prompt_suppressed(monkeypatch, tmp_path): + """Acceptance (c): a read-only / tangential prompt → reminder suppressed.""" + _redirect_store(monkeypatch, tmp_path) + store.write_session_prompt(_SID, "explain how the reorder flow works") + assert _run_hook(_payload()) == "" + + +def test_missing_prompt_file_fires(monkeypatch, tmp_path): + """Recall-bias fallback: no stored prompt → fire (never silently suppress).""" + _redirect_store(monkeypatch, tmp_path) + # no write_session_prompt for this session + assert _fired(_run_hook(_payload(session_id="sess-never-written"))) + + +def test_no_decisions_no_reminder(monkeypatch, tmp_path): + """Precondition preserved: fired but zero decisions → no reminder.""" + _redirect_store(monkeypatch, tmp_path) + store.write_session_prompt(_SID, "add the feature") + payload = _payload() + payload["tool_response"] = {"fired": True, "decisions": []} + assert _run_hook(payload) == "" + + +# ── store round-trip + cleanup + staleness sweep ───────────────────────── + + +def test_store_round_trip_and_cleanup(monkeypatch, tmp_path): + _redirect_store(monkeypatch, tmp_path) + store.write_session_prompt(_SID, "hello prompt") + assert store.read_session_prompt(_SID) == "hello prompt" + store.cleanup_session_prompt(_SID) + assert store.read_session_prompt(_SID) is None + + +def test_staleness_sweep_on_write(monkeypatch, tmp_path): + """A backdated orphan is swept on the next write; a fresh file survives.""" + _redirect_store(monkeypatch, tmp_path) + store.write_session_prompt("old-session", "stale") + old_file = tmp_path / store._DIR_NAME / "old-session.txt" + backdated = time.time() - (store._STALE_SECONDS + 3600) + import os + + os.utime(old_file, (backdated, backdated)) + store.write_session_prompt("new-session", "fresh") # triggers sweep + assert not old_file.exists() + assert (tmp_path / store._DIR_NAME / "new-session.txt").exists() + + +def test_session_end_hook_cleans_prompt(monkeypatch, tmp_path): + """The SessionEnd hook deletes the session prompt file (graceful path).""" + _redirect_store(monkeypatch, tmp_path) + store.write_session_prompt(_SID, "to be cleaned") + old_in = sys.stdin + sys.stdin = io.StringIO(json.dumps({"session_id": _SID})) + try: + session_end.main() + finally: + sys.stdin = old_in + assert store.read_session_prompt(_SID) is None diff --git a/tests/test_preflight_intent.py b/tests/test_preflight_intent.py index 4cbc4443..36124ea8 100644 --- a/tests/test_preflight_intent.py +++ b/tests/test_preflight_intent.py @@ -11,8 +11,10 @@ from scripts.hooks.preflight_intent import ( # noqa: E402 IMPLEMENTATION_VERBS, INDIRECT_INTENT_PHRASES, + READ_ONLY_PATTERNS, SKIP_PATTERNS, should_fire_preflight, + suppress_capture_reminder, ) @@ -68,3 +70,58 @@ def test_natural_contradiction_prompt(): def test_empty_prompt_does_not_fire(): assert not should_fire_preflight("") assert not should_fire_preflight(" \n\t") + + +# ── #170 — capture-reminder suppression gate ───────────────────────────── + + +def test_suppress_on_read_only_prompts(): + """Read-only / informational prompts with no implementation verb suppress.""" + read_only = ( + "explain how the reorder flow works", + "how does the session cache invalidate?", + "review the auth middleware", + "summarize the drag-to-reorder decision", + "walk me through the rate limiter", + "what is the current retry policy?", + ) + for prompt in read_only: + assert suppress_capture_reminder(prompt), f"{prompt!r} should suppress" + + +def test_no_suppress_on_implementation_verb(): + """ANY implementation verb fires (never suppress) — incl. the #175 smuggled- + refinement case. R1: also assert a NON-'add' verb to lock the regex, not the + literal token 'add'.""" + must_fire = ( + # the exact audit VETO example — compatible verb + smuggled structural refinement + "add tests for the reorder flow, and expose it as a programmatic API", + # R1 — different implementation verb, same smuggled-refinement shape + "refactor the reorder flow and expose it as a programmatic API", + "update cherry-pick.ts to use buttons instead of drag", + ) + for prompt in must_fire: + assert not suppress_capture_reminder(prompt), f"{prompt!r} must fire (not suppress)" + + +def test_no_suppress_acceptance_b_compatible_write(): + """Documented #170 trade-off: 'add tests for X' carries an impl verb ('add'), + so it is NOT suppressed — acceptance case (b) is intentionally not met to + preserve the #175 no-data-loss invariant.""" + assert not suppress_capture_reminder("add tests for drag-to-reorder") + + +def test_no_suppress_contradiction_prompt(): + """R3 — the Flow-2 contradiction prompt must never be suppressed.""" + prompt = ( + "I know the roadmap said drag-and-drop to reorder commits, " + "but actually we're switching to a text-editor approach. " + "Please update cherry-pick.ts and reorder.ts." + ) + assert not suppress_capture_reminder(prompt) + + +def test_suppress_empty_and_data_loadable(): + assert not suppress_capture_reminder("") + assert not suppress_capture_reminder(" \n\t") + assert isinstance(READ_ONLY_PATTERNS, tuple) and len(READ_ONLY_PATTERNS) >= 8