Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 19 additions & 10 deletions .github/workflows/v0-user-flow-e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,14 @@ jobs:
assertions:
name: e2e assertions (auto)
runs-on: ubuntu-latest
# production env provides CLAUDE_CODE_OAUTH_TOKEN. No required reviewers
# on this env → PR triggers flow through automatically.
environment: production
# ci-test env provides ANTHROPIC_API_KEY. No required reviewers on this
# env → PR triggers flow through automatically. Switched from the
# `production` env's CLAUDE_CODE_OAUTH_TOKEN after #528: the org
# subscription was disabled (oauth_org_not_allowed 403), and `claude -p`
# honours ANTHROPIC_API_KEY directly — decoupling CI from subscription
# policy. Same env already used by test-mcp-regression.yml and
# preflight-eval.yml.
environment: ci-test
timeout-minutes: 25
steps:
- uses: actions/checkout@v4
Expand Down Expand Up @@ -85,22 +90,26 @@ jobs:
test -f docs/process/roadmap.md
test -f app/src/lib/git/cherry-pick.ts

- name: Claude Code OAuth token visibility probe
# ANTHROPIC_API_KEY (NOT CLAUDE_CODE_OAUTH_TOKEN) — #528: the org
# subscription tied to the OAuth token was disabled, surfacing as
# oauth_org_not_allowed 403 on every flow before turn 1. `claude -p`
# honours ANTHROPIC_API_KEY natively, sidestepping subscription policy.
- name: Anthropic API key visibility probe
run: |
set +e
if [ -n "${CLAUDE_CODE_OAUTH_TOKEN}" ]; then
echo "CLAUDE_CODE_OAUTH_TOKEN: present (length=${#CLAUDE_CODE_OAUTH_TOKEN})"
if [ -n "${ANTHROPIC_API_KEY}" ]; then
echo "ANTHROPIC_API_KEY: present (length=${#ANTHROPIC_API_KEY})"
else
echo "CLAUDE_CODE_OAUTH_TOKEN: EMPTY or UNSET"
echo " secret expression non-empty: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN != '' }}"
echo "ANTHROPIC_API_KEY: EMPTY or UNSET"
echo " secret expression non-empty: ${{ secrets.ANTHROPIC_API_KEY != '' }}"
exit 1
fi
env:
CLAUDE_CODE_OAUTH_TOKEN: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}

- name: Run v0 user flow e2e (assertion-only, blocking)
env:
CLAUDE_CODE_OAUTH_TOKEN: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
run: python tests/e2e/run_e2e_flows.py

- name: Upload e2e transcripts
Expand Down
15 changes: 15 additions & 0 deletions code_locator/indexing/sqlite_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,21 @@ def get_qualified_names_in_files(self, file_paths: list[str]) -> list[str]:
).fetchall()
return [r[0] for r in rows]

def read_index_meta(self, key: str) -> str:
"""Read a value from the index_meta table (written by code_locator_runtime.record_index_state).

Returns "" when the table doesn't exist (legacy index, or first-build
mid-test). The table is owned by code_locator_runtime, but lives in
the same SQLite file as the symbol tables, so SymbolDB readers can
consume it without round-tripping through the runtime module.
"""
conn = self._connect()
try:
row = conn.execute("SELECT value FROM index_meta WHERE key = ?", (key,)).fetchone()
except sqlite3.OperationalError:
return ""
return row[0] if row else ""

def get_top_symbols_by_connectivity(self, limit: int = 20) -> list[dict]:
"""Return symbols with the most edges (most connected = most important)."""
conn = self._connect()
Expand Down
12 changes: 12 additions & 0 deletions code_locator/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,18 @@ class ValidatedSymbol(BaseModel):
default="rapidfuzz_validate",
description="How this symbol was found: keyword_extract, llm_propose, etc.",
)
indexed_at_sha: str = Field(
default="",
description=(
"Git HEAD commit the symbol index was built against. Empty when the "
"index pre-dates ref tracking. Compare against authoritative_sha "
"before bind to detect snapshot drift (#334)."
),
)
indexed_at_path: str = Field(
default="",
description="Absolute repo path the index was built against. Empty when unknown.",
)


# ── Retrieval ────────────────────────────────────────────────────────
Expand Down
9 changes: 9 additions & 0 deletions code_locator/tools/validate_symbols.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,13 @@ def __init__(self, db: SymbolDB, config: CodeLocatorConfig) -> None:
self.config = config
# Cache symbol list at init (not per-call)
self._symbols: list[tuple[int, str, str]] = db.get_all_symbol_names()
# Cache index provenance at init — head_commit + repo_path are
# written by code_locator_runtime.record_index_state after every
# rebuild and don't change between rebuilds. Exposed on every
# ValidatedSymbol so callers can detect snapshot drift vs
# authoritative_sha before bind (#334).
self._indexed_at_sha: str = db.read_index_meta("head_commit")
self._indexed_at_path: str = db.read_index_meta("repo_path")

def execute(self, args: dict) -> list[ValidatedSymbol]:
candidates = args.get("candidates", [])
Expand Down Expand Up @@ -113,6 +120,8 @@ def _fuzzy_match(self, candidate: str) -> list[ValidatedSymbol]:
match_score=score,
symbol_id=sid,
bridge_method="rapidfuzz_validate",
indexed_at_sha=self._indexed_at_sha,
indexed_at_path=self._indexed_at_path,
)
for sid, name, qn, score in reranked[:max_matches]
]
34 changes: 33 additions & 1 deletion skills/bicameral-bind/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,12 @@ The handler rejects unverified bindings (#280). To avoid the rejection path,
actually implements the decision's intent.
3. **Confirm the symbol via `validate_symbols`.** A grep match is not proof
of existence in the symbol index; the index is the source of truth and is
what the handler queries to verify your binding.
what the handler queries to verify your binding. Each result now includes
`indexed_at_sha` — the git commit the symbol index was built against.
**Compare it against the `authoritative_sha` from your most recent
`link_commit` response.** If they differ, the index is ahead of (or behind)
the ref the bind handler will validate at, and bind may reject a symbol
that `validate_symbols` confirmed — see "Snapshot drift" below.
4. **If the candidate is ambiguous (multiple symbols match the intent),
`get_neighbors` to resolve scope** before binding. Surfaces callers and
callees so you can tell whether the decision is local to one function or
Expand All @@ -50,6 +55,33 @@ The handler rejects unverified bindings (#280). To avoid the rejection path,
do NOT bind. Leave the decision ungrounded — a future ingest or
`bicameral_bind` call can pin it later.

## Snapshot drift (#334)

`validate_symbols` reads from the local SQLite symbol index (built and stamped
by `code_locator_runtime.record_index_state`). `bicameral_bind` resolves
symbols via `git show {authoritative_sha}:{file_path}` + tree-sitter. These
are two different data sources at two different refs — when they disagree, a
caller can satisfy `validate_symbols` (score 100) and still hit a hard
rejection from `bind`.

Each `validate_symbols` result now carries `indexed_at_sha`. Use it:

- **If `indexed_at_sha == authoritative_sha`:** safe to proceed — the index
was built against the same commit bind will validate at.
- **If `indexed_at_sha` differs from `authoritative_sha`:** the index is
drift-prone for this binding. Proceed only when you have independent
evidence the symbol exists at `authoritative_sha` (Read the file at that
ref; check `git log` for the introducing commit). Prefer re-indexing
(`python -m code_locator index <repo_path>`) over guessing.
- **If `indexed_at_sha == ""`:** the index pre-dates ref tracking (legacy
build, or a `record_index_state` call was skipped). Treat as snapshot-
unknown — same caution as the drift case.

Skipping this check is what made the field bug in #334 (Jacob, 2026-05):
`validate_symbols` returned `score=100` for a symbol introduced on a feature
branch after the most recent `link_commit`. The caller bound and got
`"not found at <authoritative_sha>"`.

## Anti-patterns — REJECT these

| Anti-pattern | Why it fails |
Expand Down
4 changes: 3 additions & 1 deletion tests/e2e/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,9 @@ GitHub Actions workflow: `.github/workflows/v0-user-flow-e2e.yml`.

- Triggers on PRs touching `tests/e2e/**`, `handlers/**`, `ledger/**`,
`contracts.py`, `skills/bicameral-*/**`, or the workflow itself.
- Runs in the `production` GitHub environment for `CLAUDE_CODE_OAUTH_TOKEN`.
- Runs in the `ci-test` GitHub environment for `ANTHROPIC_API_KEY`
(switched from `production` + `CLAUDE_CODE_OAUTH_TOKEN` in #528 after the
org subscription was disabled).
- Pinned `desktop/desktop` commit in the workflow file (update by editing
the env var).
- Uploads `test-results/e2e/*.ndjson` as job artifacts (30-day retention)
Expand Down
7 changes: 5 additions & 2 deletions tests/e2e/run_e2e_flows.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,11 @@
remains useful for fast dev iteration on handler logic.

Required env:
CLAUDE_CODE_OAUTH_TOKEN Claude Code CLI auth (set by GitHub Actions
``production`` environment in CI).
ANTHROPIC_API_KEY Claude Code CLI auth — `claude -p` honours this
directly (set by GitHub Actions ``ci-test``
environment in CI). Switched from
CLAUDE_CODE_OAUTH_TOKEN in #528 after the org
subscription was disabled (oauth_org_not_allowed).
DESKTOP_REPO_PATH Path to a local clone of github.com/desktop/desktop.

CI: see .github/workflows/v0-user-flow-e2e.yml.
Expand Down
141 changes: 141 additions & 0 deletions tests/test_validate_symbols_indexed_at_sha.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
"""Sociable test for #334 option C lite — validate_symbols returns
the git SHA the symbol index was built against, so callers can detect
snapshot drift vs authoritative_sha before bind.

Real SymbolDB, real build_index, real git subprocess, real
record_index_state — no MagicMock. Mirrors the
``codegenome_continuity_service._fresh_adapter`` pattern from CLAUDE.md.
"""

from __future__ import annotations

import subprocess
from pathlib import Path

import pytest

from code_locator.config import CodeLocatorConfig
from code_locator.indexing.index_builder import build_index
from code_locator.indexing.sqlite_store import SymbolDB
from code_locator.tools.validate_symbols import ValidateSymbolsTool
from code_locator_runtime import record_index_state


def _git(repo: Path, *args: str) -> str:
result = subprocess.run(
["git", *args],
cwd=repo,
capture_output=True,
text=True,
check=True,
)
return result.stdout.strip()


def _seed_repo(repo: Path) -> str:
"""Init a git repo with one Python file and commit. Returns HEAD SHA."""
repo.mkdir(parents=True, exist_ok=True)
(repo / "module.py").write_text(
"class CheckoutController:\n def process_order(self):\n return None\n"
)
_git(repo, "init", "-q", "-b", "main")
_git(repo, "config", "user.email", "test@example.com")
_git(repo, "config", "user.name", "Test User")
_git(repo, "add", ".")
_git(repo, "commit", "-q", "-m", "seed")
return _git(repo, "rev-parse", "HEAD")


def test_validate_symbols_returns_indexed_at_sha(tmp_path: Path) -> None:
"""The sha returned by validate_symbols must equal git HEAD after a
full build + record_index_state cycle — the production code path
code_locator_runtime.rebuild_index uses.
"""
repo = tmp_path / "repo"
db_path = tmp_path / "code-graph.db"
head_sha = _seed_repo(repo)

build_index(str(repo), str(db_path))
record_index_state(str(db_path), str(repo))

db = SymbolDB(str(db_path))
config = CodeLocatorConfig(sqlite_db=str(db_path))
tool = ValidateSymbolsTool(db, config)

results = tool.execute({"candidates": ["CheckoutController"]})
db.close()

assert results, "Expected at least one match for CheckoutController"
for r in results:
assert r.indexed_at_sha == head_sha, (
f"indexed_at_sha={r.indexed_at_sha!r} did not match git HEAD={head_sha!r}"
)
# repo_path is recorded as the resolved path; compare against the
# same resolution to avoid macOS /private/var vs /var aliasing.
assert r.indexed_at_path == str(repo.resolve())


def test_validate_symbols_returns_empty_sha_when_meta_missing(
tmp_path: Path,
) -> None:
"""When build_index runs without record_index_state (legacy index, or
an in-progress first build), validate_symbols returns indexed_at_sha=""
instead of raising. Caller-LLM treats empty as "snapshot unknown."
"""
repo = tmp_path / "repo"
db_path = tmp_path / "code-graph.db"
_seed_repo(repo)

build_index(str(repo), str(db_path))
# Deliberately skip record_index_state — the index_meta table never
# gets populated. read_index_meta should swallow the OperationalError
# / missing-row path and return "".

db = SymbolDB(str(db_path))
config = CodeLocatorConfig(sqlite_db=str(db_path))
tool = ValidateSymbolsTool(db, config)

results = tool.execute({"candidates": ["CheckoutController"]})
db.close()

assert results, "Expected at least one match"
for r in results:
assert r.indexed_at_sha == ""
assert r.indexed_at_path == ""


def test_validate_symbols_sha_cached_at_init(tmp_path: Path) -> None:
"""The sha is read once at tool init and reused. A second git commit
landing AFTER init does not change the cached value — matches the
existing symbol-list caching contract ("index doesn't change mid-run").
"""
repo = tmp_path / "repo"
db_path = tmp_path / "code-graph.db"
first_sha = _seed_repo(repo)

build_index(str(repo), str(db_path))
record_index_state(str(db_path), str(repo))

db = SymbolDB(str(db_path))
config = CodeLocatorConfig(sqlite_db=str(db_path))
tool = ValidateSymbolsTool(db, config)

# Add a second commit on top — the tool was initialized against first_sha.
(repo / "module.py").write_text(
"class CheckoutController:\n"
" def process_order(self):\n"
" return None\n"
" def new_method(self):\n"
" return None\n"
)
_git(repo, "add", ".")
_git(repo, "commit", "-q", "-m", "second")
second_sha = _git(repo, "rev-parse", "HEAD")
assert first_sha != second_sha

results = tool.execute({"candidates": ["CheckoutController"]})
db.close()

for r in results:
# Cached at init — reflects the build-time sha, not the post-commit one.
assert r.indexed_at_sha == first_sha
Loading