BicameralAI · jinhongkuan · Apr 14, 2026 · Apr 14, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,31 @@
 All notable changes to bicameral-mcp are tracked here. Format loosely follows
 [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
 
+## 0.4.7 — 2026-04-14 — FC-3 Vocab Cache Similarity Gate
+
+Fixes witnessed cross-contamination where the vocab cache reused an unrelated
+intent's code regions — and, worse, labeled them with the original intent's
+`purpose` text. Observed live on Accountable 2026-04-14: a "Stripe payment-link
+fallback" decision inherited 8 bogus regions from an earlier "weekly bulletin
+page" ingest because both descriptions shared incidental tokens.
+
+### Fixed
+
+- **FC-3a — Vocab cache BM25 cross-match.** `lookup_vocab_cache` now returns
+  `(symbols, matched_query_text)`. `handle_ingest` computes Jaccard similarity
+  over non-stopword 4+ char tokens and discards hits below 0.5, forcing a
+  fall-through to fresh grounding via `ground_mappings`. Deterministic, no LLM
+  in the critical indexing path (per `git-for-specs.md`).
+- **FC-3b — Stale `purpose` field on reused regions.** `_validate_cached_regions`
+  now accepts `current_description` and rewrites every returned region's
+  `purpose` field so reused regions carry the *current* intent's text, not the
+  cached one's.
+
+### Migration
+
+No manual action required. `v0.4.6 → v0.4.7` is a handler-layer fix. Existing
+vocab_cache rows remain valid; the gate rejects false positives on read.
+
 ## 0.4.6 — 2026-04-14 — Adoption Floor (Trust + First Wow)
 
 Five initiatives: FC-1 BM25 degeneracy guard, FC-2 multi-region grounding

diff --git a/events/team_adapter.py b/events/team_adapter.py
@@ -86,8 +86,15 @@ async def upsert_source_cursor(
             error=error,
         )
 
-    async def lookup_vocab_cache(self, query_text: str, repo: str) -> list[dict]:
-        """Vocab cache is local bookkeeping — no event emitted."""
+    async def lookup_vocab_cache(
+        self, query_text: str, repo: str,
+    ) -> tuple[list[dict], str]:
+        """Vocab cache is local bookkeeping — no event emitted.
+
+        Returns ``(symbols, matched_query_text)``. The second element is
+        the ``query_text`` that the top cache hit was originally stored
+        against — the caller uses it for FC-3 similarity gating.
+        """
         await self._ensure_ready()
         return await self._inner.lookup_vocab_cache(query_text, repo)
 

diff --git a/handlers/ingest.py b/handlers/ingest.py
@@ -80,8 +80,64 @@ def _normalize_payload(payload: dict) -> dict:
     return result
 
 
+# ── FC-3: vocab cache similarity gate ──────────────────────────────
+#
+# The vocab cache uses SurrealDB's ``@0@`` BM25 full-text operator to match
+# incoming descriptions against stored ``query_text``. Without a similarity
+# threshold, two unrelated intents sharing incidental tokens cross-match —
+# witnessed live on Accountable 2026-04-14 where a "Stripe payment-link
+# fallback" decision inherited 8 bogus regions from an earlier "weekly
+# bulletin page" ingest.
+#
+# The gate below computes Jaccard similarity over non-stopword tokens ≥4
+# chars. Cache hits below the threshold are discarded, forcing the caller
+# to fall through to fresh grounding (which is already correct, per FC-2).
+# Jaccard was chosen over embeddings because:
+#   1. Deterministic, no model dependency (git-for-specs.md invariant:
+#      "no LLM in critical indexing path")
+#   2. The downstream ground_mappings pipeline already handles semantic
+#      variation via BM25+graph fusion — an embedding gate here would
+#      double-count
+#   3. 20 LOC vs 200+ LOC with a new dependency
+
+_VOCAB_SIMILARITY_THRESHOLD = 0.5
+
+_VOCAB_STOPWORDS = frozenset({
+    "the", "and", "for", "that", "this", "with", "are", "from", "have",
+    "will", "when", "then", "been", "also", "into", "about", "should",
+    "must", "need", "each", "they", "their", "there", "which", "where",
+    "what", "than", "some", "more", "such", "only", "very", "just",
+    "like", "make", "made", "use", "used", "using", "after", "before",
+    "over", "under", "between", "through", "against",
+})
+
+
+def _content_tokens(text: str) -> set[str]:
+    """Lowercase, non-stopword, ≥4-char tokens for similarity comparison."""
+    import re
+    raw = re.findall(r"[A-Za-z]{4,}", text or "")
+    return {t.lower() for t in raw if t.lower() not in _VOCAB_STOPWORDS}
+
+
+def _jaccard_similarity(a: str, b: str) -> float:
+    """Jaccard coefficient over ``_content_tokens`` sets.
+
+    Returns 0.0 when either set is empty. Returns 1.0 when both strings
+    produce identical token sets.
+    """
+    ta = _content_tokens(a)
+    tb = _content_tokens(b)
+    if not ta or not tb:
+        return 0.0
+    intersection = ta & tb
+    union = ta | tb
+    return len(intersection) / len(union)
+
+
 def _validate_cached_regions(
-    regions: list[dict], code_graph,
+    regions: list[dict],
+    code_graph,
+    current_description: str = "",
 ) -> list[dict]:
     """Check cached code_regions against the live symbol index.
 
@@ -95,6 +151,13 @@ def _validate_cached_regions(
 
     When lookup_by_name returns multiple rows, prefers the row matching
     the cached region's file_path to avoid picking an unrelated symbol.
+
+    v0.4.7 (FC-3): when ``current_description`` is non-empty, the returned
+    region's ``purpose`` field is rewritten to it. Previously this function
+    preserved the cached region's stale ``purpose`` (= the ORIGINAL
+    intent's description), cross-wiring intents so one decision's regions
+    carried another decision's label. Witnessed live on Accountable
+    2026-04-14.
     """
     try:
         code_graph._ensure_initialized()
@@ -120,13 +183,16 @@ def _validate_cached_regions(
             (r for r in rows if r["file_path"] == cached_file),
             rows[0],
         )
-        valid.append({
+        entry = {
             **region,
             "file_path": row["file_path"],
             "start_line": row["start_line"],
             "end_line": row["end_line"],
             "type": row["type"],
-        })
+        }
+        if current_description:
+            entry["purpose"] = current_description  # FC-3: rewrite stale purpose
+        valid.append(entry)
     return valid
 
 
@@ -155,6 +221,7 @@ async def handle_ingest(
     # Runs before ground_mappings — a hit skips the full BM25 pipeline.
     mappings_to_ground = payload.get("mappings") or []
     cache_hits = 0
+    cache_similarity_rejections = 0
     pre_grounded: set[str] = set()
     for mapping in mappings_to_ground:
         if mapping.get("code_regions"):
@@ -167,19 +234,36 @@ async def handle_ingest(
         if not description:
             continue
         try:
-            cached_symbols = await ledger.lookup_vocab_cache(description, repo)
+            cached_symbols, matched_query_text = await ledger.lookup_vocab_cache(description, repo)
             if cached_symbols:
+                # FC-3 similarity gate: the vocab cache lookup uses SurrealDB's
+                # BM25 @0@ operator, which is too loose on its own. Two unrelated
+                # intents sharing incidental tokens can cross-match. Compute
+                # Jaccard similarity between the incoming description and the
+                # matched query_text, and reject the cache hit if it's below
+                # threshold. Falls through to fresh grounding via ground_mappings.
+                similarity = _jaccard_similarity(description, matched_query_text)
+                if similarity < _VOCAB_SIMILARITY_THRESHOLD:
+                    cache_similarity_rejections += 1
+                    logger.info(
+                        "[ingest] vocab cache rejected (similarity %.2f < %.2f): "
+                        "current=%r matched=%r",
+                        similarity, _VOCAB_SIMILARITY_THRESHOLD,
+                        description[:60], matched_query_text[:60],
+                    )
+                    continue
                 valid_regions = _validate_cached_regions(
                     cached_symbols, ctx.code_graph,
+                    current_description=description,  # FC-3: rewrite purpose
                 )
                 if valid_regions:
                     mapping["code_regions"] = valid_regions
                     cache_hits += 1
                     pre_grounded.add(description)
                     logger.info(
-                        "[ingest] vocab cache hit for '%s' (%d/%d regions valid)",
+                        "[ingest] vocab cache hit for '%s' (%d/%d regions valid, sim=%.2f)",
                         description[:60],
-                        len(valid_regions), len(cached_symbols),
+                        len(valid_regions), len(cached_symbols), similarity,
                     )
                 else:
                     logger.debug(

diff --git a/ledger/adapter.py b/ledger/adapter.py
@@ -124,8 +124,13 @@ async def lookup_vocab_cache(
         self,
         query_text: str,
         repo: str,
-    ) -> list[dict]:
-        """Check vocab_cache for cached grounding results."""
+    ) -> tuple[list[dict], str]:
+        """Check vocab_cache for cached grounding results.
+
+        Returns ``(symbols, matched_query_text)``. The matched query text
+        is needed by callers to run the FC-3 similarity gate before
+        deciding whether to reuse the cached symbols.
+        """
         await self._ensure_connected()
         return await lookup_vocab_cache(self._client, query_text, repo)
 

diff --git a/ledger/queries.py b/ledger/queries.py
@@ -222,13 +222,20 @@ async def lookup_vocab_cache(
     query_text: str,
     repo: str,
     max_results: int = 3,
-) -> list[dict]:
+) -> tuple[list[dict], str]:
     """BM25 lookup on vocab_cache for cached grounding results.
 
-    Returns the ``symbols`` array from the top matching cache entry
-    (a list of code_region-shaped dicts). Empty list on miss.
+    Returns a 2-tuple: ``(symbols, matched_query_text)``.
+      - ``symbols`` is the cached code_region-shaped dict list from the
+        top matching cache entry, or ``[]`` on miss.
+      - ``matched_query_text`` is the ``query_text`` that the top hit was
+        originally stored against. The caller uses this to compute a
+        similarity gate (FC-3 fix) before deciding whether to reuse the
+        cached symbols — BM25's ``@0@`` operator is too loose on its
+        own and cross-contaminates unrelated intents.
 
     On hit, increments hit_count and refreshes last_hit for LRU tracking.
+    On miss, returns ``([], "")``.
     """
     rows = await client.query(
         """
@@ -241,7 +248,7 @@ async def lookup_vocab_cache(
         {"query": query_text, "repo": repo, "max_results": max_results},
     )
     if not rows:
-        return []
+        return [], ""
 
     top = rows[0]
     top_id = top.get("id")
@@ -250,7 +257,7 @@ async def lookup_vocab_cache(
             f"UPDATE {top_id} SET hit_count += 1, last_hit = time::now()",
         )
 
-    return top.get("symbols") or []
+    return top.get("symbols") or [], str(top.get("query_text") or "")
 
 
 async def upsert_vocab_cache(

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "bicameral-mcp"
-version = "0.4.6"
+version = "0.4.7"
 description = "Decision ledger MCP server — ingests meeting transcripts, maps decisions to code, tracks drift"
 readme = "README.md"
 requires-python = ">=3.10"