Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions scripts/hooks/post_preflight_capture_reminder.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@

import json
import sys
from pathlib import Path

sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
from hooks.preflight_intent import suppress_capture_reminder # noqa: E402
from hooks.session_prompt_store import read_session_prompt # noqa: E402

PREFLIGHT_TOOL_NAME = "mcp__bicameral__bicameral_preflight"

Expand Down Expand Up @@ -133,6 +138,12 @@ def main() -> int:
decisions = response.get("decisions") or []
if not isinstance(decisions, list) or not decisions:
return 0
# #170 — suppress on read-only/informational prompts. Recall-biased: a
# missing/unreadable prompt or any implementation-verb prompt fires, so a
# refinement (smuggled or not) is never silently dropped (#175 invariant).
prompt = read_session_prompt(str(payload.get("session_id") or "")) or ""
if suppress_capture_reminder(prompt):
return 0
json.dump(
{
"hookSpecificOutput": {
Expand Down
53 changes: 53 additions & 0 deletions scripts/hooks/preflight_intent.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,3 +87,56 @@ def should_fire_preflight(prompt: str) -> bool:
return True
lowered = prompt.lower()
return any(phrase in lowered for phrase in INDIRECT_INTENT_PHRASES)


# #170 — read-only / informational intent. Used by the post-preflight capture
# reminder gate to suppress the Step 5.6 nudge on prompts that cannot be
# implementing a refinement. Deliberately NARROW and applied only AFTER the
# implementation-verb check (see suppress_capture_reminder): any prompt with a
# write verb is never suppressed, so a refinement smuggled under a compatible
# verb ("add tests ... and expose X as a programmatic API") always fires the
# reminder. This preserves the #175 invariant that contradiction judgment is
# never decided by a lexical scan over a write-intent prompt.
READ_ONLY_PATTERNS: tuple[re.Pattern[str], ...] = (
re.compile(r"\bexplain\b", re.IGNORECASE),
re.compile(r"\bhow (?:does|do|did|can|would|is|are)\b", re.IGNORECASE),
re.compile(r"\bwhat(?:'s| is| are| does| do)\b", re.IGNORECASE),
re.compile(r"\bwhy (?:does|do|did|is|are)\b", re.IGNORECASE),
re.compile(r"\bdescribe\b", re.IGNORECASE),
re.compile(r"\bsummar(?:ize|ise|y)\b", re.IGNORECASE),
re.compile(r"\breview\b", re.IGNORECASE),
re.compile(r"\bshow me\b", re.IGNORECASE),
re.compile(r"\bwalk me through\b", re.IGNORECASE),
re.compile(r"\btell me about\b", re.IGNORECASE),
re.compile(r"\bwhere (?:is|are|does|do)\b", re.IGNORECASE),
re.compile(r"\bunderstand\b", re.IGNORECASE),
re.compile(r"\blook at\b", re.IGNORECASE),
)


def suppress_capture_reminder(prompt: str) -> bool:
"""Return True iff the post-preflight capture reminder should be SUPPRESSED.

#170: the reminder (Step 5.6 of bicameral-preflight) is otherwise injected on
every preflight call that surfaces >=1 decision, asking the user to
disambiguate a possible refinement. That is spam on read-only / informational
prompts where no refinement exists.

Recall-biased gate (operator-selected "read-only-only" suppression, preserving
the #175 no-data-loss invariant):
1. empty / whitespace prompt -> False (fire; uncertain)
2. ANY implementation verb present -> False (fire; could be implementing a
refinement, smuggled or not — never suppress a write-intent prompt)
3. otherwise, suppress iff a read-only / informational signal is present.

Because step 2 runs before step 3, the read-only patterns only ever evaluate
verb-free prompts — so their loose matching can only suppress prompts that
cannot be performing an edit this turn. NOTE: this intentionally does NOT
reuse should_fire_preflight, whose SKIP_PATTERNS (e.g. ``add ... test``) would
misclassify write prompts as non-firing and reopen the #175 hole.
"""
if not prompt or not prompt.strip():
return False
if _VERB_REGEX.search(prompt):
return False
return any(pat.search(prompt) for pat in READ_ONLY_PATTERNS)
6 changes: 6 additions & 0 deletions scripts/hooks/preflight_reminder.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@

sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
from hooks.preflight_intent import should_fire_preflight # noqa: E402
from hooks.session_prompt_store import write_session_prompt # noqa: E402

REMINDER_TEXT = (
"<system-reminder>\n"
Expand Down Expand Up @@ -76,6 +77,11 @@ def main() -> int:
except (json.JSONDecodeError, ValueError):
return 0
prompt = payload.get("prompt", "") if isinstance(payload, dict) else ""
# #170 — hand the prompt off to the PostToolUse capture-reminder gate, which
# cannot see the prompt from its own payload. Write regardless of fire intent
# (a read-only prompt the agent still preflights on must be classifiable).
session_id = payload.get("session_id", "") if isinstance(payload, dict) else ""
write_session_prompt(str(session_id or ""), prompt)
if should_fire_preflight(prompt):
json.dump(
{
Expand Down
10 changes: 9 additions & 1 deletion scripts/hooks/session_end_queue_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@
from pathlib import Path

sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent))
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
from hooks.session_prompt_store import cleanup_session_prompt # noqa: E402

from events.transcript_queue import write_pending # noqa: E402


Expand All @@ -34,7 +37,12 @@ def main() -> int:
return 0
if not isinstance(payload, dict):
return 0
session_id = str(payload.get("session_id") or uuid.uuid4())
session_id_raw = str(payload.get("session_id") or "")
# #170 — best-effort cleanup of the prompt handoff file, before the
# transcript early-return so it runs even when no transcript is queued.
if session_id_raw:
cleanup_session_prompt(session_id_raw)
session_id = session_id_raw or str(uuid.uuid4())
transcript_path = str(payload.get("transcript_path", "")).strip()
cwd = str(payload.get("cwd", "")).strip()
if not transcript_path or not cwd:
Expand Down
92 changes: 92 additions & 0 deletions scripts/hooks/session_prompt_store.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
"""Session-scoped prompt handoff for the preflight hook pair (#170).

The PostToolUse capture-reminder gate needs the user's prompt to classify it,
but PostToolUse payloads carry only ``tool_name``/``tool_input``/``tool_response``
— not the prompt. The UserPromptSubmit hook (which DOES receive the prompt)
writes it here keyed by Claude Code ``session_id``; the PostToolUse hook reads it
back; the SessionEnd hook deletes it.

All operations are best-effort and never raise — a broken handoff degrades to
"fire the reminder" (recall-biased), never to a crash that blocks the user.

Storage: ``<tempdir>/bicameral-prompts/<session_id>.txt``. To bound orphans from
sessions that crash without firing SessionEnd, ``write_session_prompt`` also
sweeps files older than ``_STALE_SECONDS`` on each write.
"""

from __future__ import annotations

import re
import tempfile
import time
from pathlib import Path

_DIR_NAME = "bicameral-prompts"
_STALE_SECONDS = 24 * 60 * 60 # 24h — orphan-file staleness bound
_SAFE_SESSION = re.compile(r"[^A-Za-z0-9_.-]")


def _store_dir() -> Path:
return Path(tempfile.gettempdir()) / _DIR_NAME


def _path_for(session_id: str) -> Path | None:
sid = _SAFE_SESSION.sub("_", session_id.strip())
if not sid:
return None
return _store_dir() / f"{sid}.txt"


def _sweep_stale(now: float | None = None) -> None:
"""Delete prompt files older than the staleness bound. Best-effort."""
now = time.time() if now is None else now
try:
for f in _store_dir().glob("*.txt"):
try:
if now - f.stat().st_mtime > _STALE_SECONDS:
f.unlink()
except OSError:
continue
except OSError:
return


def write_session_prompt(session_id: str, prompt: str) -> None:
"""Persist ``prompt`` for ``session_id`` and sweep stale orphans."""
if not session_id or not prompt:
return
path = _path_for(session_id)
if path is None:
return
try:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(prompt, encoding="utf-8")
except OSError:
return
_sweep_stale()


def read_session_prompt(session_id: str) -> str | None:
"""Return the stored prompt for ``session_id``, or None if unavailable."""
if not session_id:
return None
path = _path_for(session_id)
if path is None:
return None
try:
return path.read_text(encoding="utf-8")
except OSError:
return None


def cleanup_session_prompt(session_id: str) -> None:
"""Delete the stored prompt for ``session_id`` (graceful SessionEnd path)."""
if not session_id:
return
path = _path_for(session_id)
if path is None:
return
try:
path.unlink()
except OSError:
return
17 changes: 16 additions & 1 deletion skills/bicameral-preflight/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -515,14 +515,29 @@ Narrate one line: *"Captured refinement: '<paraphrase>' — wired as
#### Hook reinforcement

A PostToolUse hook scoped to `mcp__bicameral__bicameral_preflight` injects a
`<system-reminder>` after every preflight call that surfaces ≥1 decision. The
`<system-reminder>` after a preflight call that surfaces ≥1 decision. The
reminder templates Step 5.6.1's `AskUserQuestion` shape with the surfaced
`decision_id` + description filled in, so the question fires reliably even
when the agent's natural inclination would be to skip the disambiguation.
Source: `scripts/hooks/post_preflight_capture_reminder.py`; wired by
`setup_wizard._install_claude_hooks` and the e2e harness's
`materialize_settings_with_hooks`.

**Read-only suppression gate (#170).** The reminder is suppressed for
unambiguously read-only / informational prompts (e.g. "explain how the reorder
flow works", "review the session module") where no refinement can exist. The
gate is **recall-biased and deliberately narrow**: it suppresses ONLY when the
prompt carries a read-only signal AND **no** implementation verb
(`suppress_capture_reminder` in `scripts/hooks/preflight_intent.py`). Any
write-intent prompt — including a refinement smuggled under a compatible verb
("add tests … and expose X as a programmatic API") — always fires, preserving
the #175 invariant that contradiction is never decided by a lexical scan over a
write prompt. The prompt is handed from the UserPromptSubmit hook to this hook
via `scripts/hooks/session_prompt_store.py` (session-scoped, swept after 24h and
deleted on SessionEnd). Consequence: a compatible-write prompt like "add tests
for drag-to-reorder" still fires the disambiguation (it cannot be lexically told
apart from a smuggled refinement) — accepted by design over the data-loss risk.

### 6. Honor blocking hints (guided mode vs normal mode)

The agent's `guided_mode` setting controls whether action hints are
Expand Down
134 changes: 134 additions & 0 deletions tests/test_post_preflight_capture_reminder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
"""Integration tests for the #170 capture-reminder suppression gate.

Drives the real PostToolUse hook ``main()`` with JSON payloads on stdin and a
real (tempdir-redirected) session prompt file written by the real
``session_prompt_store`` — no mocks of the classifier or the store. Covers the
three #170 acceptance fixtures, the recall-bias fallback (missing prompt → fire),
and the cleanup/staleness story.
"""

from __future__ import annotations

import io
import json
import sys
import tempfile
import time
from pathlib import Path

REPO_ROOT = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(REPO_ROOT))
sys.path.insert(0, str(REPO_ROOT / "scripts"))

from scripts.hooks import post_preflight_capture_reminder as hook # noqa: E402
from scripts.hooks import session_end_queue_writer as session_end # noqa: E402
from scripts.hooks import session_prompt_store as store # noqa: E402

_SID = "sess-170-test"
_DECISIONS = [{"decision_id": "decision:abc", "description": "Drag-to-reorder commits"}]


def _run_hook(payload: dict) -> str:
"""Invoke the hook main() with payload on stdin; return captured stdout."""
old_in, old_out = sys.stdin, sys.stdout
sys.stdin = io.StringIO(json.dumps(payload))
sys.stdout = io.StringIO()
try:
hook.main()
return sys.stdout.getvalue()
finally:
sys.stdin, sys.stdout = old_in, old_out


def _payload(session_id: str = _SID) -> dict:
return {
"tool_name": "mcp__bicameral__bicameral_preflight",
"tool_response": {"fired": True, "decisions": _DECISIONS},
"session_id": session_id,
}


def _fired(stdout: str) -> bool:
return "system-reminder" in stdout and "AskUserQuestion" in stdout


def _redirect_store(monkeypatch, tmp_path) -> None:
monkeypatch.setattr(tempfile, "gettempdir", lambda: str(tmp_path))


def test_contradiction_prompt_fires(monkeypatch, tmp_path):
"""Acceptance (a): a contradiction / write-intent prompt → reminder fires."""
_redirect_store(monkeypatch, tmp_path)
store.write_session_prompt(
_SID, "actually we're switching reorder to buttons — update reorder.ts"
)
assert _fired(_run_hook(_payload()))


def test_compatible_add_tests_still_fires(monkeypatch, tmp_path):
"""Acceptance (b) trade-off: 'add tests for X' carries 'add' → NOT suppressed.
Documented #170 decision (preserve #175 over suppressing compatible writes)."""
_redirect_store(monkeypatch, tmp_path)
store.write_session_prompt(_SID, "add tests for drag-to-reorder")
assert _fired(_run_hook(_payload()))


def test_read_only_prompt_suppressed(monkeypatch, tmp_path):
"""Acceptance (c): a read-only / tangential prompt → reminder suppressed."""
_redirect_store(monkeypatch, tmp_path)
store.write_session_prompt(_SID, "explain how the reorder flow works")
assert _run_hook(_payload()) == ""


def test_missing_prompt_file_fires(monkeypatch, tmp_path):
"""Recall-bias fallback: no stored prompt → fire (never silently suppress)."""
_redirect_store(monkeypatch, tmp_path)
# no write_session_prompt for this session
assert _fired(_run_hook(_payload(session_id="sess-never-written")))


def test_no_decisions_no_reminder(monkeypatch, tmp_path):
"""Precondition preserved: fired but zero decisions → no reminder."""
_redirect_store(monkeypatch, tmp_path)
store.write_session_prompt(_SID, "add the feature")
payload = _payload()
payload["tool_response"] = {"fired": True, "decisions": []}
assert _run_hook(payload) == ""


# ── store round-trip + cleanup + staleness sweep ─────────────────────────


def test_store_round_trip_and_cleanup(monkeypatch, tmp_path):
_redirect_store(monkeypatch, tmp_path)
store.write_session_prompt(_SID, "hello prompt")
assert store.read_session_prompt(_SID) == "hello prompt"
store.cleanup_session_prompt(_SID)
assert store.read_session_prompt(_SID) is None


def test_staleness_sweep_on_write(monkeypatch, tmp_path):
"""A backdated orphan is swept on the next write; a fresh file survives."""
_redirect_store(monkeypatch, tmp_path)
store.write_session_prompt("old-session", "stale")
old_file = tmp_path / store._DIR_NAME / "old-session.txt"
backdated = time.time() - (store._STALE_SECONDS + 3600)
import os

os.utime(old_file, (backdated, backdated))
store.write_session_prompt("new-session", "fresh") # triggers sweep
assert not old_file.exists()
assert (tmp_path / store._DIR_NAME / "new-session.txt").exists()


def test_session_end_hook_cleans_prompt(monkeypatch, tmp_path):
"""The SessionEnd hook deletes the session prompt file (graceful path)."""
_redirect_store(monkeypatch, tmp_path)
store.write_session_prompt(_SID, "to be cleaned")
old_in = sys.stdin
sys.stdin = io.StringIO(json.dumps({"session_id": _SID}))
try:
session_end.main()
finally:
sys.stdin = old_in
assert store.read_session_prompt(_SID) is None
Loading
Loading