From e81604ca03e34e4ff70a63720136866c32946e82 Mon Sep 17 00:00:00 2001 From: jinhongkuan Date: Mon, 18 May 2026 17:03:07 -0700 Subject: [PATCH 1/8] docs(plans): R4 amendment to ledger locator plan + R4-bis PASS (#368) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit R4 retracts R3's `resolve_config_path()` (filesystem-topology inference for primary-worktree convergence) and replaces it with five amendments: 1. DELETE `resolve_config_path()` from the locator. Runtime readers in `context.py` revert to direct `/.bicameral/config.yaml` access; wizard uses `git show HEAD:.bicameral/config.yaml` for onboarding detection. No filesystem-topology inference — concept ports cleanly across all 9 deployment shapes catalogued in the Topology Problem Notion page (worktree, submodule, bare-repo, sparse checkout, devcontainer, Codespaces, CI, --separate-git-dir, non-git VCS). 2. CONFIG SPLIT — team-identity keys stay at `/.bicameral/config.yaml` (git-committed); per-operator keys move to `~/.bicameral/projects//operator.yaml` (per-machine). Routing table `context._CONFIG_KEY_ROUTING` is the single source of truth. 3. EXPLICIT VCS CONTRACT — structured `ProjectIdResolutionError` from `ledger_locator/_project_id.py::common_dir_for` when `git rev-parse` fails, with verbatim "bicameral currently supports git only". 4. REUSE `_resolve_authoritative_ref()` for divergence guard. 5. DEFER full ephemeral-environment support; R4 adds only a one-line notice in `migrate-state` post-flight summary. Backed by 9 ratified bicameral decisions + 14 explicitly-rejected alternatives (see bicameral ledger 2026-05-18 session). Audit chain: - R4 (2026-05-18) VETO — V1 `_edit_config_interactive` wrong function name; V2 stale context.py reader lines + bogus 412-429 entry; V3 missing tests for solo-mode short-circuit + `run_config_wizard` editor. Gate: .qor/gates/2026-05-18T2334-r4audit/audit.json (local). - R4-bis (2026-05-18) PASS after plan-text fixes. Gate: .qor/gates/2026-05-18T2338-r4bis/audit.json (local). META_LEDGER entries #51 (VETO) and #52 (PASS) recorded. Phase 1 implementation lands in the following commit. Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/META_LEDGER.md | 43 +++ ...2026-05-16-ledger-locator-and-migration.md | 341 ++++++++++++++++++ 2 files changed, 384 insertions(+) create mode 100644 thoughts/shared/plans/2026-05-16-ledger-locator-and-migration.md diff --git a/docs/META_LEDGER.md b/docs/META_LEDGER.md index fad6eee5..4dafa900 100644 --- a/docs/META_LEDGER.md +++ b/docs/META_LEDGER.md @@ -2785,3 +2785,46 @@ Same pattern as prior entries (#28, #33, #36, #41, #43–#49): --- *Chain integrity: VALID (50 entries on this branch)* *Genesis: `29dfd085` → ... → v0-release-blockers SEAL: `7cc405fc` → #218 Phase 1 SEAL (#43) → #218 LLM-06 SEAL (#44, PR #251) → #227 SOC2-06+OWASP-06 SEAL (#45, PR #253) → #252 Layer 2 SEAL (#46, PR #256) → #252 Layer 3 SEAL (#47, PR #257) → #252 Layer 4 SEAL (#48) → team-server tier v1 RESEARCH (#49) → R1 limitation remediation RESEARCH (#50)* + +--- + +### Entry #51: GATE TRIBUNAL — Ledger Locator R4 (#368) + +**Timestamp**: 2026-05-18T23:34:00Z +**Phase**: audit (R4) +**Plan target**: `thoughts/shared/plans/2026-05-16-ledger-locator-and-migration.md` (revision 4) +**Auditor**: The Judge (solo mode; `codex-plugin` capability shortfall logged) +**Verdict**: **VETO** +**Risk grade**: L2 +**Findings categories**: `infrastructure-mismatch` (2), `coverage-gap` (1) +**Gate artifact**: `.qor/gates/2026-05-18T2334-r4audit/audit.json` +**Audit report**: `.agent/staging/AUDIT_REPORT.md` + +**Findings summary**: +- V1 (infrastructure-mismatch): Phase 2 cites `_edit_config_interactive` at `setup_wizard.py:1817` but actual function is `run_config_wizard`. +- V2 (infrastructure-mismatch): `context.py` reader line numbers stale by 1-7; `412-429` cited as reader is a `BicameralContext` dataclass field block; actual reader count is 10 not 9. +- V3 (coverage-gap): R4 test list omits (a) solo-mode short-circuit in `run_setup`, (b) `run_config_wizard` two-pane editor changes. + +**Required next action**: Governor amends plan text to fix V1, V2, V3; re-run `/qor-audit` for R4-bis. + +All three findings classify as **Plan-text** per `qor/references/doctrine-audit-report-language.md` — no `/qor-debug` invocation. + +--- + +### Entry #52: GATE TRIBUNAL — Ledger Locator R4-bis (#368) + +**Timestamp**: 2026-05-18T23:38:00Z +**Phase**: audit (R4-bis re-audit) +**Plan target**: `thoughts/shared/plans/2026-05-16-ledger-locator-and-migration.md` (revision 4-bis) +**Auditor**: The Judge (solo mode) +**Verdict**: **PASS** +**Risk grade**: L2 +**Cleared findings**: V1 (function name fixed), V2 (context.py reader lines + count corrected, 412-429 dropped), V3 (3 new functional tests added) +**Gate artifact**: `.qor/gates/2026-05-18T2338-r4bis/audit.json` +**Audit report**: `.agent/staging/AUDIT_REPORT.md` + +**Open advisories** (non-VETO, carry forward): +- Section 4 razor: `_write_collaboration_config` may grow past 40 lines under two-file split; pre-emptive helper split recommended. +- Documentation drift: `docs/architecture/ledger-locator.md` (declared in plan frontmatter) does not yet exist; hard-blocks at `/qor-substantiate`, not at `/qor-audit`. + +**Required next action**: Governor proceeds to `/qor-implement` against R4-bis PASS. diff --git a/thoughts/shared/plans/2026-05-16-ledger-locator-and-migration.md b/thoughts/shared/plans/2026-05-16-ledger-locator-and-migration.md new file mode 100644 index 00000000..309b0b94 --- /dev/null +++ b/thoughts/shared/plans/2026-05-16-ledger-locator-and-migration.md @@ -0,0 +1,341 @@ +# Plan: Ledger Locator + state migration to `~/.bicameral/projects//` (#368) + +**change_class**: feature + +**doc_tier**: system + +**terms_introduced**: +- term: ledger-locator + home: docs/architecture/ledger-locator.md +- term: project-id + home: docs/architecture/ledger-locator.md +- term: state-migration + home: docs/architecture/ledger-locator.md + +**boundaries**: +- limitations: + - Single-user, single-machine layout. No multi-user shared-runner support. + - `git` must be on `$PATH` (the locator keys off `git rev-parse --git-common-dir`). +- non_goals: + - Per-branch DBs (rejected in the #368 RFC; same reasoning). + - Daemon-based ownership of the ledger file — tracked separately in the sibling daemon plan (`2026-05-16-bicameral-daemon.md` for #372). + - Auto-rebuild of `code-graph.db` after migration. The existing `python -m code_locator index ` flow remains the (re)builder. +- exclusions: + - CI / Docker / shared-runner auto-detection (operators set env vars explicitly). + - `--ledger-location=repo` opt-out flag. + - Any change to what gets stored in either file. + +## Why this ships first + +The locator decides *where* both the ledger and the code-graph live. The daemon plan binds to whatever path the locator resolves. If the daemon ships first, it binds to the legacy `/.bicameral/` path and the locator change becomes a destructive layout migration on a running daemon — strictly harder. Sequencing locator → daemon means the daemon plan can assume the layout is already settled and existing users have already migrated. + +Target release: **v0.16.0** (locator + migration). The daemon plan targets **v0.16.0** as well, but cuts a separate PR against `dev` that lands after this one. + +## Audit history + +| Revision | Audit | Verdict | Gate artifact | +|---|---|---|---| +| 1 (original) | 2026-05-17 (solo) | **VETO** — V1 `specification-drift` (None default crashes `resolve_paths`), V2 `infrastructure-mismatch` (single-consumer model misses 15 read sites), V3 `infrastructure-mismatch` (cited `setup_wizard.py:366` actually `:472`) | `.qor/gates/2026-05-17T0605-a24ab8/audit.json` | +| 2 | 2026-05-17 (solo) | **PASS** — V1/V2/V3 cleared; advisories on `origin.txt` term + `gc` jargon left open | `.qor/gates/2026-05-17T0605-a24ab8/audit.json` (R2 PASS overwrote R1) | +| 3 | pending | — | (audit skipped — superseded by R4) | +| 4 | 2026-05-18 (solo) | **VETO** — V1 `infrastructure-mismatch` (`_edit_config_interactive` actual name `run_config_wizard`), V2 `infrastructure-mismatch` (context.py reader lines stale + bogus `412-429`), V3 `coverage-gap` (missing solo-short-circuit + `run_config_wizard` editor tests) | `.qor/gates/2026-05-18T2334-r4audit/audit.json` | +| 4-bis (this revision) | pending | — | — | + +**R1 V1 + V2 resolution**: Phase 2 now adopts V1(b) + V2(b) combined — `load_config()` substitutes the locator-resolved path on construction; `resolve_paths()` is None-safe as a belt-and-braces guard for direct-construction callers. Four new unit tests (`test_code_locator_config_none_safe.py`) pin the contract. + +**R1 V3 resolution**: `setup_wizard.py:366` → `setup_wizard.py:472`. + +**R3 scope expansion — worktree completeness**: R2's PASS satisfied the literal #368 acceptance criterion (ledger + code-graph share across worktrees), but missed three file classes that #368's value proposition (*"Multiple working trees of one project share... same decisions, same symbol graph"*) actually requires: +- `.bicameral/local/bm25_index.pkl` — derived from `code-graph.db`, must travel with it. One consumer at `code_locator_runtime.py:277`. +- `.bicameral/local/watermark` — `events/materializer.py:24` reads `local_dir/"watermark"`. Per-worktree today → peers' events re-replayed N times → duplicate `input_span` rows under multi-worktree usage. +- `.bicameral/pending-transcripts/` + `processed-transcripts/` — `events/transcript_queue.py:18-27` declares the layout under `/.bicameral/`. Per-worktree today → transcript ingested in worktree A is invisible to worktree B's drain loop. +- `.bicameral/config.yaml` — committed to git, but worktrees on different branches diverge silently. **R4 supersedes R3's read-fix here** (see R4 expansion below). R3 had proposed `resolve_config_path()` to read from primary worktree; R4 deletes that approach. + +These are added to Phase 2 (locator API + consumer wiring) and Phase 3 (migrate-state file inventory). + +**R4 expansion — config split + git-native worktree handling**: R3's `resolve_config_path()` was a filesystem-topology inference ("look in primary worktree's directory") that special-cased `git worktree add` and silently misclassified submodules, bare-repo deployments, sparse checkouts, dev containers, Codespaces, and ephemeral CI runners. Captured in detail at the [Topology Problem Notion page](https://www.notion.so/3642a51619c481149836ef883ae62489). The R3 choice was deliberate per its own text, but the audit was pending and the choice was challenged this session. The R4 amendments: + +1. **DELETE `resolve_config_path()` from the locator module** (decision:ew9rgegdlblexsraesss — ratified 2026-05-18). Runtime readers in `context.py` revert to direct `Path(repo_path) / ".bicameral" / "config.yaml"` access (the v0.15.x baseline). Wizard onboarding detection uses `git show HEAD:.bicameral/config.yaml`. Divergence guard uses `git show :.bicameral/config.yaml`. No filesystem-topology inference anywhere — concept ports cleanly across all 9 deployment shapes the Topology Problem catalogs. +2. **Config split** (decision:5nr66wvmapjpt58rrji8): team-identity keys (`mode`, `team.backend`, `team.folder_id`/`remote_root`, `ingest_max_bytes`, optional `signer_email_fallback_min_strength`) stay at `/.bicameral/config.yaml` (git-committed). Per-operator keys (`telemetry`, `channel`, `guided`, `signer_email_fallback`, `render_source_attribution`, `team.author`, `team.role`, rate-limit knobs, query timeouts) move to `~/.bicameral/projects//operator.yaml` (per-machine). `setup_wizard._write_collaboration_config` splits into two atomic writes (temp-file + rename); `context.py` readers route per-key. +3. **Explicit VCS contract** (decision:6c20xahdyxk3suzav4pj): surface a structured `ProjectIdResolutionError` from `ledger_locator/_project_id.py::common_dir_for` when `git rev-parse --git-common-dir` fails. Error message names the assumption: `"bicameral currently supports git only; non-git VCSes are not yet implemented"`. Forces future port to jj/sapling to be a deliberate amendment. +4. **Reuse `_resolve_authoritative_ref()`** (decision:ogdfx014sqgc6fi6ky1a): wizard's default-branch divergence guard delegates to `setup_wizard.py:325::_resolve_authoritative_ref()`. Do not reinvent the `origin/HEAD → user-prompt` fallback ladder. +5. **Defer ephemeral environments** (decision:e3xz4c4ji4x7lm3lvq4k): R4 only adds a one-line notice in `migrate-state` CLI post-flight summary naming the issue. Full ephemeral-environment support (BICAMERAL_STATE_ROOT, --ephemeral=no-op, rebuild-from-team CLI) is a subsequent patch (v0.16.1 / v0.17). + +**R4 specification-drift retraction**: R3 line 127 said "the asymmetry [between worktree-local writes and primary-worktree reads] is deliberate." That sentence is **retracted** by R4 — there is no asymmetry under R4 because there is no primary-worktree resolution. The "deliberate" framing was a one-line shortcut described as a tradeoff; R4 picks a layer where the question doesn't arise. + +## Open questions + +1. **`bicameral-mcp update` migration trigger.** v0.16 changes the canonical state layout. Anyone running `bicameral-mcp update` from v0.15.x must have their existing `/.bicameral/ledger.db` and `/.bicameral/local/code-graph.db` moved before the new binary tries to open them at the new path. **Resolution**: the `update` flow (an existing skill — see `.claude/skills/bicameral-update/`) runs `bicameral-mcp migrate-state --auto` after package upgrade and before the next MCP boot. `--auto` is non-interactive, idempotent, and archives-on-collision (defined below). Operators who upgrade outside the skill see a one-time first-boot warning surfacing the same command. + +2. **What happens to legacy `/.bicameral/` after migration.** The `ledger.db`, `local/code-graph.db{,-shm,-wal}`, derived state (bm25, watermark, transcript queues), and per-operator config (R4 adds `operator.yaml` to the home-side) all move out; the rest of `/.bicameral/` (the trimmed `config.yaml` with only team-identity keys, events/, hooks/) stays where it is — it's repo-scoped state, not user-local cache. The migration deletes only the moved files, leaves `/.bicameral/local/` if empty, removes it if it's empty. + +3. **Project-id collision.** `sha256(git rev-parse --git-common-dir)[:16]` is collision-resistant in practice but not by guarantee. **Resolution**: on first `resolve_ledger_url()` for a project, write `~/.bicameral/projects//origin.txt` containing the absolute git-common-dir path. On subsequent resolves, read that file and refuse to proceed if it disagrees (configurable via `BICAMERAL_LOCATOR_ALLOW_COLLISION=1` — surfaces the offending path in the error). + +4. **Memory-mode (`SURREAL_URL=memory://`) interaction.** Memory mode skips the locator entirely for the ledger URL (env-var wins). For the code-graph file there is no memory equivalent; tests must point `CODE_LOCATOR_SQLITE_DB` at `tmp_path` explicitly. The locator never silently writes test artifacts into `~/.bicameral/projects/` — tests that omit both env vars get a temp-dir locator fixture from `conftest.py`. + +5. **(R4 resolved 2026-05-18)** ~~How does the wizard detect that team mode is already configured when a teammate clones the repo?~~ → `git show HEAD:.bicameral/config.yaml`. Works uniformly across all 9 deployment shapes (worktree, submodule, bare-repo, sparse checkout, devcontainer, Codespaces, CI runner, `--separate-git-dir`, non-git VCS error). See decision:ew9rgegdlblexsraesss. + +6. **(R4 resolved 2026-05-18)** ~~Should the git-only VCS assumption be implicit (accidental success on misclassified VCSes) or explicit?~~ → Explicit. Structured error from `ledger_locator/_project_id.py::common_dir_for`. See decision:6c20xahdyxk3suzav4pj. + +7. **(R4 resolved 2026-05-18)** ~~How does the wizard pick the default branch for the divergence guard?~~ → Reuse `_resolve_authoritative_ref()` at `setup_wizard.py:325`. See decision:ogdfx014sqgc6fi6ky1a. + +8. **(R4 deferred 2026-05-18)** Codespaces / devcontainer / CI persistence: `~/.bicameral/projects//` evaporates at container teardown. **R4 placeholder**: one-line notice in `migrate-state` post-flight. **Full scope** (BICAMERAL_STATE_ROOT, --ephemeral=no-op, rebuild-from-team CLI, ephemeral-signal detection) is a subsequent patch (v0.16.1 / v0.17). See decision:e3xz4c4ji4x7lm3lvq4k. + +## Phase 1: `ledger_locator/` module + +The deterministic resolver. No I/O until you call a `resolve_*` function. No network, no LLM, no git mutations. + +### Affected files + +- `ledger_locator/__init__.py` (new) — public API: `resolve_ledger_url()`, `resolve_code_graph_path()`, `project_id_for(repo_path)`, `project_dir_for(repo_path)`. +- `ledger_locator/_project_id.py` (new) — wrap `git rev-parse --git-common-dir`, hash, return 16-char hex. +- `ledger_locator/_origin_guard.py` (new) — read/write/verify `/origin.txt`. + +### Unit tests (write first) + +- `tests/test_ledger_locator.py::test_default_resolves_under_home_bicameral_projects` — in a real git repo (`tmp_path` + `git init`), `resolve_ledger_url()` returns `surrealkv:///.bicameral/projects/<16hex>/ledger.db`; `resolve_code_graph_path()` returns the sibling `code-graph.db`. +- `tests/test_ledger_locator.py::test_resolves_derived_state_paths_under_project_dir` — same fixture; assert `resolve_bm25_index_path()`, `resolve_watermark_path()`, `resolve_pending_transcripts_dir()`, `resolve_processed_transcripts_dir()` all share the same `<16hex>` parent dir as `resolve_code_graph_path()` (one project, one bag of state). +- `tests/test_ledger_locator.py::test_resolve_operator_config_path` (**R4 replacement** for the dropped `test_resolve_config_path_uses_primary_worktree`) — assert `resolve_operator_config_path()` returns `project_dir_for(repo) / "operator.yaml"` (per-operator config under the project state dir). No worktree-divergence test — config.yaml is now read as a normal tracked file via `/.bicameral/config.yaml`; per-worktree divergence is expected/handled by git, not the locator. +- `tests/test_ledger_locator.py::test_env_override_wins_for_ledger_only` — `SURREAL_URL=memory://`; `resolve_ledger_url()` returns `memory://` and `resolve_code_graph_path()` still returns the on-disk default. Each env var is independent. +- `tests/test_ledger_locator.py::test_env_override_wins_for_code_graph_only` — `CODE_LOCATOR_SQLITE_DB=/tmp/x.db`; `resolve_code_graph_path()` returns `/tmp/x.db`; `resolve_ledger_url()` returns the default. +- `tests/test_ledger_locator.py::test_two_worktrees_resolve_to_same_id` — create a primary git repo + `git worktree add` a second working tree; assert `project_id_for(primary) == project_id_for(secondary)`. +- `tests/test_ledger_locator.py::test_separate_clones_resolve_to_different_ids` — two `git init` repos in disjoint dirs produce different project IDs (the inputs to the hash differ). +- `tests/test_ledger_locator.py::test_non_git_directory_raises_actionable_error` — `resolve_ledger_url()` from a non-git directory raises `ProjectIdResolutionError` whose message names the missing `.git/` and points at the env-var override. +- `tests/test_ledger_locator_origin_guard.py::test_first_resolve_writes_origin_txt` — `resolve_ledger_url()` from a fresh project; assert `/origin.txt` exists and contains the absolute common-dir path. +- `tests/test_ledger_locator_origin_guard.py::test_collision_with_different_origin_raises` — manually write a project dir's `origin.txt` pointing at `/somewhere/else`; `resolve_ledger_url()` raises `ProjectIdCollisionError` whose message includes both the on-disk origin and the current one. +- `tests/test_ledger_locator_origin_guard.py::test_collision_override_logs_and_proceeds` — same setup, `BICAMERAL_LOCATOR_ALLOW_COLLISION=1`; the call succeeds and emits a single WARN with both paths in the message. + +### Implementation files + +- `ledger_locator/_project_id.py`: + - `def common_dir_for(repo_path: Path) -> Path`: `subprocess.run(["git", "rev-parse", "--git-common-dir"], cwd=repo_path)`; raises `ProjectIdResolutionError` on non-zero exit. **R4 — explicit VCS contract** (decision:6c20xahdyxk3suzav4pj): error message names the assumption verbatim: `"bicameral currently supports git only; non-git VCSes are not yet implemented. To use bicameral with this repo, run from inside a git working tree."` Force future ports to jj/sapling/fossil to be a deliberate locator amendment rather than an accidental success on a misclassified VCS. + - `def project_id_for(repo_path: Path) -> str`: `hashlib.sha256(str(common_dir_for(repo_path).resolve()).encode()).hexdigest()[:16]`. + +- `ledger_locator/_origin_guard.py`: + - `def assert_origin(project_dir: Path, common_dir: Path) -> None`: on first call, writes `origin.txt`. On subsequent calls, compares and raises `ProjectIdCollisionError` unless `BICAMERAL_LOCATOR_ALLOW_COLLISION=1`. + +- `ledger_locator/__init__.py`: + - `STATE_ROOT = Path.home() / ".bicameral" / "projects"`. + - `def project_dir_for(repo_path: Path | None = None) -> Path`: defaults `repo_path` to `Path.cwd()`; calls the helpers above. + - `def resolve_ledger_url(repo_path: Path | None = None) -> str`: env `SURREAL_URL` wins; else `f"surrealkv://{project_dir_for(repo_path) / 'ledger.db'}"`. + - `def resolve_code_graph_path(repo_path: Path | None = None) -> Path`: env `CODE_LOCATOR_SQLITE_DB` wins; else `project_dir_for(repo_path) / "code-graph.db"`. + - **R3 additions** (worktree-shared derived state, project-scoped): + - `def resolve_bm25_index_path(repo_path: Path | None = None) -> Path`: `project_dir_for(repo_path) / "bm25_index.pkl"`. Sibling to the code-graph SQLite — they are a unit. + - `def resolve_watermark_path(repo_path: Path | None = None) -> Path`: `project_dir_for(repo_path) / "watermark"`. Replaces `events/materializer.py`'s `local_dir / "watermark"`. + - `def resolve_pending_transcripts_dir(repo_path: Path | None = None) -> Path`: `project_dir_for(repo_path) / "pending-transcripts"`. Replaces `events/transcript_queue.py:_pending_root`. + - `def resolve_processed_transcripts_dir(repo_path: Path | None = None) -> Path`: `project_dir_for(repo_path) / "processed-transcripts"`. Sibling. + - **R4 addition** (per-operator config, project-scoped, per-machine): + - `def resolve_operator_config_path(repo_path: Path | None = None) -> Path`: `project_dir_for(repo_path) / "operator.yaml"`. Per-operator settings live under the project state dir; shared across worktrees on the same machine; not committed to git. **Decision**: decision:5nr66wvmapjpt58rrji8. + - **R4 retraction** (was R3 addition): `resolve_config_path()` is **NOT** added. R3 had proposed `Path(common_dir_for(repo_path)).parent / ".bicameral" / "config.yaml"` for primary-worktree convergence; R4 deletes this approach entirely. `/.bicameral/config.yaml` is read as a normal tracked file by callers via `Path(repo_path) / ".bicameral" / "config.yaml"` (no locator function). Per the [Topology Problem](https://www.notion.so/3642a51619c481149836ef883ae62489) analysis — filesystem-topology inference special-cases `git worktree add` and silently misclassifies 8 other deployment shapes (submodules, bare-repo, Codespaces, etc.). Decision: decision:ew9rgegdlblexsraesss; superseded R3 design at decision:6z39wrjpmmg9vhm8i6t4 (rejected). + +## Phase 2: Delegate every call site to the locator + +Today the path is computed inline in four places. After this phase, the locator is the only source. + +### Affected files + +- `ledger/adapter.py:158-161` (`_default_db_url`) — delegate to `ledger_locator.resolve_ledger_url()`. Remove the local `Path.home() / ".bicameral" / "ledger.db"` literal. +- `code_locator_runtime.py:48` — `os.environ.setdefault("CODE_LOCATOR_SQLITE_DB", str(ledger_locator.resolve_code_graph_path()))`. The runtime stops computing a path itself. +- `code_locator_runtime.py:277` (`bm25_path = Path(config.sqlite_db).parent / "bm25_index.pkl"`) — delegate to `ledger_locator.resolve_bm25_index_path()`. Removes the implicit "bm25 lives next to sqlite_db" coupling; both paths come from the locator independently. +- `code_locator/config.py:17` — drop the `~/.bicameral/code-graph.db` literal default; the field type becomes `str | None` with default `None`. **`load_config()` (line 70) is responsible for substituting the locator-resolved path before construction returns**; see Implementation files below. This means every existing reader of `config.sqlite_db` (enumerated below) sees a resolved path without needing to change. +- `code_locator/config.py:36-39` (`resolve_paths`) — None-safe wrap. When `sqlite_db is None`, fall through to `ledger_locator.resolve_code_graph_path()` before `Path(...).expanduser()`. Belt-and-braces against any caller that builds `CodeLocatorConfig(...)` directly instead of through `load_config()`. +- `events/materializer.py:24` (`self._watermark_path = local_dir / "watermark"`) — replace with `self._watermark_path = ledger_locator.resolve_watermark_path()`. Drop the `local_dir` parameter from `EventMaterializer.__init__`; callers stop computing watermark paths. +- `events/transcript_queue.py:18-27` (`PENDING_DIR`, `_pending_root`, `_processed_root`) — replace `Path(repo_path) / ".bicameral" / PENDING_DIR` with `ledger_locator.resolve_pending_transcripts_dir(repo_path)`. Same for processed. +- `scripts/hooks/transcript_archive.py` (SessionEnd hook, runs out-of-process) — replace the `/.bicameral/pending-transcripts/` literal with `ledger_locator.resolve_pending_transcripts_dir(Path.cwd())`. The hook is already installed as a console script (`pyproject.toml:80`), so importing `ledger_locator` is safe. +- `context.py` — **10 readers** that hardcode `Path(repo_path) / ".bicameral" / "config.yaml"` (R4 V2 correction: R3 cited "9 readers" with stale line numbers and a bogus `412-429` claim — that range is a `BicameralContext` dataclass field block, not a reader). Actual readers at current `dev` HEAD: `_read_yaml_string_field:66`, `_read_signer_email_fallback:86`, `_read_render_source_attribution:99`, `_read_ingest_max_bytes:164`, `_read_ingest_rate_limit_burst:189`, `_read_ingest_rate_limit_refill_per_sec:213`, `_read_query_timeout_seconds:252` (generic helper), `_read_query_timeout_read_seconds:295` (wraps generic), `_read_query_timeout_drift_seconds:307` (wraps generic), `_read_guided_mode:319`. **R4 supersedes R3 here**: instead of delegating to `ledger_locator.resolve_config_path()` (R3 design), each reader is routed per-key based on R4 config split (decision:5nr66wvmapjpt58rrji8). Team-identity keys stay direct on `/.bicameral/config.yaml`. Per-operator keys (`signer_email_fallback`, `render_source_attribution`, `guided`, rate-limit knobs, query timeouts) read from `ledger_locator.resolve_operator_config_path(repo_path)`. Per-key routing table is the single source of truth — encoded in a new `context._CONFIG_KEY_ROUTING` constant (key → enum {`team`, `operator`}). Wizard, migrate-state, and context.py all consume the same table. +- `setup_wizard.py:472` — stop writing `SURREAL_URL` for the default case (locator decides). *(Audit V3: corrected from earlier `setup_wizard.py:366` which was lifted from the #368 issue body and predated the current file.)* +- `setup_wizard.py:473` — stop writing `CODE_LOCATOR_SQLITE_DB` for the default case (locator decides). +- `setup_wizard.py:1528` (`_write_collaboration_config`) — **R4 split**: function now writes BOTH `/.bicameral/config.yaml` (team-identity keys via atomic temp-file + rename) AND `~/.bicameral/projects//operator.yaml` (per-operator keys via atomic temp-file + rename). If either temp-file write fails, both temps are unlinked and the function raises (no torn state). The two files are written in a defined order: operator.yaml first (no git impact), config.yaml second (commit-able). Decision: decision:5nr66wvmapjpt58rrji8. +- `setup_wizard.py:1697` (existing `_detect_linked_worktree` warning) — **R4 trailer**: in addition to the existing hooks-per-worktree note, append a one-line summary after every successful wizard run: `"team config → .bicameral/config.yaml (commit this) | your settings → ~/.bicameral/projects//operator.yaml (private)"`. Cheap fix for the silent-leak-to-git class of bugs where operators forget what's committed. +- `setup_wizard.run_setup` — **R4 onboarding detection**: before prompting for `_select_collaboration_mode`, call `subprocess.run(["git", "show", "HEAD:.bicameral/config.yaml"], ...)`. If returncode 0 and `mode == "team"`, print `"Detected team config: backend=<...>, folder=<...> ✓ (auto-joining)"` and skip the team-backend wizard. Decision: decision:ew9rgegdlblexsraesss. +- `setup_wizard.run_setup` — **R4 divergence guard**: when `git show HEAD:.bicameral/config.yaml` returns no config but `git show :.bicameral/config.yaml` (via `_resolve_authoritative_ref()`) does, warn: `"Your branch doesn't have .bicameral/config.yaml, but does. Merge first to inherit team config, or continue with fresh setup? [y/N]"`. Decision: decision:ogdfx014sqgc6fi6ky1a — reuses existing `_resolve_authoritative_ref()` ladder. +- `setup_wizard.py:1817` (`run_config_wizard`) — **R4 two-pane editor** (R4 V1 correction: R4-initial misnamed this `_edit_config_interactive`; actual function name at the cited line is `run_config_wizard`). The existing single-pane editor splits into a tagged single-pane (each row carries `[team]` or `[your machine]` prefix) so operators can SEE which choices affect teammates. Editor reads from both files via the per-key routing table; writes back to the appropriate file with the same atomic temp+rename discipline as `_write_collaboration_config`. + +**`config.sqlite_db` reader inventory** (grep-verified against current `dev` HEAD; included so the auditor doesn't need to re-walk): + +| File | Line | Read | +|---|---|---| +| `adapters/code_locator.py` | 128 | `SymbolDB(config.sqlite_db)` | +| `handlers/link_commit.py` | 567 | `_get_meta(cg_config.sqlite_db, "head_commit")` | +| `code_locator_runtime.py` | 223, 228, 229, 236, 237, 239, 241, 260, 261, 265, 275, 276, 277 | 13 reads, indexing + meta + symbol-count paths | + +None of these read sites need to change: they all obtain `config` via `load_config()` (the existing factory pattern), and `load_config()` now substitutes the locator-resolved path before returning. The `resolve_paths` None-safety in `code_locator/config.py:36-39` covers any future direct-construction caller. + +### Unit tests (write first) + +- `tests/test_ledger_adapter_uses_locator.py::test_default_url_comes_from_locator` — no env vars set; `SurrealDBLedgerAdapter()._url` equals `ledger_locator.resolve_ledger_url()`. (Asserts on the resolver's output rather than a hard-coded path so the test survives layout changes.) +- `tests/test_code_locator_runtime_uses_locator.py::test_runtime_sets_env_from_locator` — call the runtime's init helper in a `tmp_path` git repo with `CODE_LOCATOR_SQLITE_DB` unset; assert `os.environ["CODE_LOCATOR_SQLITE_DB"]` equals `str(ledger_locator.resolve_code_graph_path(...))` for that repo. +- `tests/test_code_locator_runtime_uses_locator.py::test_runtime_respects_existing_env` — `CODE_LOCATOR_SQLITE_DB=/tmp/x.db` pre-set; runtime init leaves it untouched (the locator's own env-priority logic already preserves it). +- `tests/test_setup_wizard_omits_state_env_vars.py::test_build_config_no_surreal_url` — `_build_config(...)` returned dict's `env` does NOT contain `SURREAL_URL`. +- `tests/test_setup_wizard_omits_state_env_vars.py::test_build_config_no_code_locator_sqlite_db` — same dict does NOT contain `CODE_LOCATOR_SQLITE_DB`. +- `tests/test_code_locator_config_none_safe.py::test_load_config_substitutes_locator_path_when_unset` — clear `CODE_LOCATOR_SQLITE_DB` from env; call `load_config()` in a `tmp_path` git repo; assert `config.sqlite_db == str(ledger_locator.resolve_code_graph_path())`. Catches V2-class regressions where a future change removes the `load_config`-level substitution. +- `tests/test_code_locator_config_none_safe.py::test_resolve_paths_handles_none_sqlite_db` — construct `CodeLocatorConfig(sqlite_db=None)` directly (bypass `load_config()`); call `.resolve_paths()`; assert `config.sqlite_db` is a non-None string ending in `code-graph.db`. Catches V1-class regressions where a future caller direct-constructs the dataclass and the None-safe wrap is missing. +- `tests/test_code_locator_config_none_safe.py::test_env_var_still_wins_over_locator` — set `CODE_LOCATOR_SQLITE_DB=/tmp/explicit.db`; call `load_config()`; assert `config.sqlite_db == "/tmp/explicit.db"`. Catches the regression where the locator substitution starts overriding an explicit env override. +- `tests/test_code_locator_config_none_safe.py::test_load_config_resolves_before_consumers_run` — in a `tmp_path` git repo with `CODE_LOCATOR_SQLITE_DB` unset, call `load_config()` and instantiate a `SymbolDB(config.sqlite_db)` against the returned path; assert the underlying SQLite file is created at the locator-resolved location (not at the literal string `"None"`). Direct check that the order-of-init concern V2 raised is resolved. +- `tests/test_two_worktrees_share_state.py::test_decision_visible_across_worktrees` — `git init` repo + `git worktree add`; ingest a decision from worktree A; instantiate a fresh adapter against worktree B; `get_all_decisions()` returns the row. Same physical `ledger.db`. Gated by `pytest.mark.requires_git` (the runner needs a real `git` binary). +- `tests/test_two_worktrees_share_state.py::test_watermark_shared_across_worktrees` — `git init` + `git worktree add`; instantiate `EventMaterializer` against worktree A; replay peer events; instantiate a second `EventMaterializer` from worktree B; assert `_read_offsets()` returns the offsets written by worktree A's replay (proves both worktrees read the same physical `watermark` file under the project dir). Without this fix, B would return `{}` and re-replay everything. +- `tests/test_two_worktrees_share_state.py::test_pending_transcript_visible_across_worktrees` — `git init` + `git worktree add`; `write_pending(worktree_a, session_id="s1", transcript_path=...)`; assert `list_pending_fifo(worktree_b)` returns the file (single project-scoped queue, not per-worktree). +- `tests/test_two_worktrees_share_state.py::test_bm25_index_shared_across_worktrees` — `git init` + `git worktree add`; assert `ledger_locator.resolve_bm25_index_path(worktree_a) == ledger_locator.resolve_bm25_index_path(worktree_b)`. (Functional check; we don't need to actually build a BM25 index for the test — the path-equality contract is what mattered.) +- **R4 drops** `tests/test_config_path_resolution.py` from R3 (worktree-convergence at the locator layer is no longer the design). **R4 adds** the following Phase 2 tests instead: +- `tests/test_config_split.py::test_team_identity_keys_persist_to_config_yaml` — call `_write_collaboration_config(data_path, mode="team", team_backend={...}, telemetry=False)`; assert `/.bicameral/config.yaml` contains `mode`, `team.backend`, `team.folder_id` but NOT `telemetry`, `channel`, `guided`. (Per-operator keys absent from the committed file.) +- `tests/test_config_split.py::test_operator_keys_persist_to_operator_yaml` — same call; assert `~/.bicameral/projects//operator.yaml` contains `telemetry`, `channel`, `guided`, `signer_email_fallback`, `render_source_attribution`, but NOT `mode` or `team.*`. +- `tests/test_config_split.py::test_atomic_two_file_write_failure_unlinks_both_temps` — monkeypatch `Path.replace` to raise on the second file; call `_write_collaboration_config`; assert neither destination file exists, no `*.tmp` artifacts remain in either directory, and the function raised. +- `tests/test_config_split.py::test_routing_table_covers_every_key` — assert every key produced by `_write_collaboration_config` is present in `context._CONFIG_KEY_ROUTING` with a non-None value of `team` or `operator`. Catches drift where a new key is added to the writer but not the routing table. +- `tests/test_config_split.py::test_context_reads_route_per_key` — write `mode: team` to a fake config.yaml and `guided: true` to a fake operator.yaml; assert `_read_guided_mode(repo)` returns True (read from operator.yaml) and a representative team-mode reader returns `"team"` (read from config.yaml). +- `tests/test_setup_wizard_git_native.py::test_onboarding_skips_team_prompts_when_head_has_team_config` — `git init` repo + commit `.bicameral/config.yaml` with `mode: team` and team-backend block. `monkeypatch` the wizard's questionary prompts to raise (would block on prompt). Call `run_setup(repo_path)`; assert it succeeds without raising and the team-prompt path is skipped (proves `git show HEAD:` detection short-circuits the wizard). +- `tests/test_setup_wizard_git_native.py::test_onboarding_skips_solo_prompts_when_head_has_solo_config` (**R4 V3 addition**) — symmetric to the team test: `git init` repo + commit `.bicameral/config.yaml` with `mode: solo`. Monkeypatch `_select_collaboration_mode` to raise. Call `run_setup(repo_path)`; assert it succeeds and `_select_collaboration_mode` was not invoked (proves solo-mode short-circuit branch fires). +- `tests/test_setup_wizard_git_native.py::test_divergence_guard_warns_on_branch_without_config` — `git init` repo with `main` having committed `.bicameral/config.yaml`; `git checkout -b feature-x` and remove `.bicameral/config.yaml`. From the feature branch, call the wizard's divergence guard helper; assert the warning message names the default branch (`main`) and offers the merge-first option. Decision: decision:ogdfx014sqgc6fi6ky1a. +- `tests/test_run_config_wizard.py::test_editor_reads_from_both_files` (**R4 V3 addition**) — pre-populate `/.bicameral/config.yaml` with `mode: team` and `~/.bicameral/projects//operator.yaml` with `guided: true, channel: stable`. Monkeypatch `questionary` to capture the prompt scaffolding instead of prompting. Invoke `run_config_wizard()` and assert the captured scaffolding includes BOTH `mode` (team-side) and `guided` (operator-side) as editable rows, each tagged with the correct `[team]` / `[your machine]` prefix. +- `tests/test_run_config_wizard.py::test_editor_writes_to_routed_file` (**R4 V3 addition**) — same fixture; monkeypatch `questionary` to return simulated edits: change `mode: team` → `solo` AND `guided: true` → `false`. Invoke `run_config_wizard()`; assert `/.bicameral/config.yaml` now contains `mode: solo` (and is otherwise unchanged) AND `~/.bicameral/projects//operator.yaml` now contains `guided: false` (and is otherwise unchanged). Catches the regression where the editor writes operator-side edits to config.yaml (or vice versa). +- `tests/test_ledger_locator_vcs_contract.py::test_non_git_directory_raises_with_vcs_message` — `resolve_ledger_url()` from a non-git tmp_path; assert the raised `ProjectIdResolutionError`'s message contains the verbatim string `"bicameral currently supports git only; non-git VCSes are not yet implemented"`. Decision: decision:6c20xahdyxk3suzav4pj. + +### Implementation files + +- `ledger/adapter.py`: + - `def _default_db_url() -> str: return ledger_locator.resolve_ledger_url()`. The helper stays as a single line so future tests / mocks can monkey-patch it. +- `code_locator_runtime.py`: + - The init function calls `os.environ.setdefault("CODE_LOCATOR_SQLITE_DB", str(ledger_locator.resolve_code_graph_path()))` instead of computing the path inline. +- `code_locator/config.py`: + - Change field declaration `sqlite_db: str = "~/.bicameral/code-graph.db"` to `sqlite_db: str | None = None`. Remove the hard-coded path literal. + - Replace `resolve_paths()` body with the None-safe form: + ```python + def resolve_paths(self) -> CodeLocatorConfig: + if self.sqlite_db is None: + from ledger_locator import resolve_code_graph_path + self.sqlite_db = str(resolve_code_graph_path()) + else: + self.sqlite_db = str(Path(self.sqlite_db).expanduser()) + return self + ``` + - `load_config()` is unchanged in body — it already terminates with `.resolve_paths()`. The None default + None-safe `resolve_paths` is what closes the V1/V2 gap: every consumer that goes through `load_config()` sees a resolved string; every consumer that direct-constructs the dataclass and calls `.resolve_paths()` also sees one. + - The pre-existing `CODE_LOCATOR_SQLITE_DB` env-var override path in `load_config()` (lines 56-68) is unchanged — when set, it lands in `config_data["sqlite_db"]` as a string and bypasses the locator fallback. Tests that pre-set the env var continue to work. +- `setup_wizard.py::_build_config`: + - Delete the two lines that write `SURREAL_URL` (now at line 472, was misquoted as 366 in the pre-audit draft) and `CODE_LOCATOR_SQLITE_DB` (line 473). No replacement — the locator picks both up at runtime from the agent process's env (which is empty for both vars by default). + +- `setup_wizard.py::_write_collaboration_config` (R4 split — decision:5nr66wvmapjpt58rrji8): + - Function signature unchanged. Body splits the input into team-identity and per-operator buckets via the `context._CONFIG_KEY_ROUTING` table. + - Two-file atomic write pattern: write each file to `.tmp` first, then `os.replace(.tmp, )` for atomic rename. If the second `replace` fails, the first file's temp gets unlinked (cleanup) — but the first file has already been successfully renamed, so the writer attempts to restore the previous content via backup. Simpler alternative: always write operator.yaml first (no git side-effect on failure), then config.yaml; if config.yaml fails, operator.yaml is left in its new state but the function raises. Operator can rerun. Document the failure mode in the function docstring. + - The function prints both file paths on success: `"team config → {config_path} (commit this)"` and `"your settings → {operator_path} (private)"`. + +- `context.py`: + - New module-level constant `_CONFIG_KEY_ROUTING: dict[str, Literal["team", "operator"]]`. Lists every key the wizard writes, partitioning team-identity vs per-operator. Single source of truth for migrate-state, wizard, and readers. + - Each of the 9 existing reader helpers (`_read_signer_email_fallback`, `_read_render_source_attribution`, `_read_ingest_max_bytes`, `_read_ingest_rate_limit_burst`, `_read_ingest_rate_limit_refill_per_sec`, `_read_query_timeout_*`, `_read_guided_mode`, generic `_read_yaml_string_field`) consults `_CONFIG_KEY_ROUTING[key]` to pick the file: `Path(repo_path) / ".bicameral" / "config.yaml"` for team-identity keys, `ledger_locator.resolve_operator_config_path(repo_path)` for per-operator keys. + - Existing fallback semantics preserved per reader (missing file → default; malformed YAML → line-oriented fallback). + +- `setup_wizard.py::run_setup` (R4 git-native onboarding detection — decision:ew9rgegdlblexsraesss): + - Before calling `_select_collaboration_mode()`, run `git show HEAD:.bicameral/config.yaml` via `subprocess.run([...], cwd=repo_path, capture_output=True, text=True)`. + - On returncode 0 + parseable YAML + `mode == "team"`: print `"Detected team config: backend={team.backend}, folder={team.folder_id or team.remote_root} ✓ (auto-joining)"` and skip the entire `_select_team_backend` flow. `team_backend = parsed["team"]` is reused verbatim. + - On returncode 0 + `mode == "solo"`: similar short-circuit — `collab_mode = "solo"`; skip prompt. + - Otherwise (returncode != 0 / unparseable / no mode): fall through to existing `_select_collaboration_mode()` prompt. + - Divergence guard (decision:ogdfx014sqgc6fi6ky1a): when HEAD lacks the file, also run `git show {default_branch}:.bicameral/config.yaml` where `default_branch = _resolve_authoritative_ref(repo_path)[0]`. If that returncode is 0, prompt: `"Your branch doesn't have .bicameral/config.yaml, but {default_branch} does. Merge first to inherit team config, or continue with fresh setup? [y/N]"`. On `n`/empty (default `N`), exit non-zero with the merge-first hint. + +## Phase 3: `migrate-state` CLI + auto-trigger on `bicameral update` + +The bytes have to move before the new binary tries to open them. `migrate-state` is a one-shot, idempotent, manifest-resumable CLI that walks the current repo and relocates *every project-scoped state file* from `/.bicameral/...` into the locator-resolved project dir. Archives on collision; cleans up empty source directories after success. + +**Source inventory** (per R3 — the full project-scoped state, not just ledger + code-graph): + +| Legacy path (under `/.bicameral/`) | Destination (under `~/.bicameral/projects//`) | Why it moves | +|---|---|---| +| `ledger.db` (or legacy user-global `~/.bicameral/ledger.db`) | `ledger.db` | The decision store. | +| `local/code-graph.db` (+ `-shm`, `-wal`) | `code-graph.db` (+ siblings) | Symbol index. | +| `local/bm25_index.pkl` | `bm25_index.pkl` | BM25 index over the same symbols. | +| `local/watermark` | `watermark` | Per-peer event-replay offsets. | +| `pending-transcripts/*.jsonl` | `pending-transcripts/*.jsonl` | SessionEnd-hook queue; per-worktree today, project-scoped after. | +| `processed-transcripts/*.jsonl` | `processed-transcripts/*.jsonl` | Sibling to pending. | + +**R4 addition** — partitioning existing `/.bicameral/config.yaml`: + +| Legacy key (in `/.bicameral/config.yaml`) | R4 destination | Routing class | +|---|---|---| +| `mode` | stays | `team` | +| `team.backend`, `team.folder_id`, `team.remote_root` | stays | `team` | +| `ingest_max_bytes` | stays | `team` | +| `telemetry`, `channel`, `guided` | move to `~/.bicameral/projects//operator.yaml` | `operator` | +| `signer_email_fallback`, `render_source_attribution` | move | `operator` | +| `team.author`, `team.role` | move | `operator` | +| `ingest_rate_limit_burst`, `ingest_rate_limit_refill_per_sec` | move | `operator` | +| `query_timeout_read_seconds`, `query_timeout_drift_seconds` | move | `operator` | + +The partition is driven by `context._CONFIG_KEY_ROUTING` (defined in Phase 2 Implementation files). Unknown keys (added by future versions) stay in `config.yaml` with a warning logged — forward-compat for unrecognized keys defers their classification to a later release rather than dropping them. + +**Files that do NOT move** (and why, so the operator's post-flight summary can name them by intent rather than oversight): +| Path | Why we keep it where it is | +|---|---| +| `.bicameral/config.yaml` (trimmed, R4) | Per-clone identity, committed to git. Read directly via `Path(repo) / ".bicameral" / "config.yaml"` (no locator function). Worktree-divergence is treated like any other tracked file — git handles it. Decision: decision:ew9rgegdlblexsraesss. | +| `.bicameral/events/*.jsonl` | Currently committed to git (will change under #373). Leaving them in working tree until #373 lands; until then, worktrees on different branches will see different event-file content — accepted as a transitional limitation, called out in the migrate-state post-flight summary. | + +**R4 post-flight notice** (decision:e3xz4c4ji4x7lm3lvq4k): on every `migrate-state` success, append the following line to the summary block, regardless of detected environment: `"Note: ~/.bicameral/projects/ persists per home directory. If you run bicameral inside an ephemeral container (Codespaces, devcontainer, CI), state will be lost at teardown. Full ephemeral-environment support tracked separately under v0.16.1/v0.17."` Single line; bounded scope; doesn't depend on environment detection (which is the deferred feature). + +### Affected files + +- `cli/migrate_state.py` (new) — argparse-driven CLI: `--repo PATH` (defaults to cwd), `--auto` (non-interactive), `--dry-run`, `--archive-dir PATH`. +- `server.py:1447-1497` (`_register_subparsers`) — register `migrate-state` (and an alias `migrate-ledger` so the issue's verbiage works). +- `server.py:1500-1545` (`_dispatch`) — route to `cli.migrate_state.main`. +- `.claude/skills/bicameral-update/SKILL.md` — append a post-upgrade step: run `bicameral-mcp migrate-state --auto` before reporting "update complete". + +### Unit tests (write first) + +- `tests/test_migrate_state.py::test_moves_ledger_and_code_graph_in_one_pass` — `tmp_path` git repo; pre-create `/.bicameral/ledger.db` (1KB of random bytes) and `/.bicameral/local/code-graph.db{,-shm,-wal}` (mock files); call `migrate_state.main(["--repo", str(repo), "--auto"])`; assert (a) all four files now exist at the locator-resolved project dir, (b) byte-for-byte identical to the originals, (c) the originals are gone, (d) the empty `/.bicameral/local/` was removed. +- `tests/test_migrate_state.py::test_idempotent_second_run_is_noop` — run migrate twice; second invocation reports "nothing to migrate" with exit 0 and does not touch the destination files (compare mtime before/after). +- `tests/test_migrate_state.py::test_collision_archives_destination` — pre-populate both source and destination ledger.db with different content; migrate; assert the destination file got moved to `/ledger.db..bak` before the source replaced it; the source then no longer exists at the source path. +- `tests/test_migrate_state.py::test_dry_run_writes_nothing` — populate sources; `--dry-run`; assert source files unchanged, destination dir does not exist, stdout enumerates the planned operations. +- `tests/test_migrate_state.py::test_missing_source_skips_silently` — repo with no `/.bicameral/ledger.db`; migrate exits 0; stdout says "nothing to migrate"; destination dir is not created. +- `tests/test_migrate_state.py::test_partial_state_migrates_what_exists` — only `ledger.db` exists locally (no code-graph); migrate moves the ledger, skips the code-graph silently, exits 0. +- `tests/test_migrate_state.py::test_auto_flag_skips_prompts` — populate sources; `--auto`; `monkeypatch` `input` to raise (would block on prompt); migration still succeeds (proves the non-interactive path). +- `tests/test_migrate_state.py::test_archive_dir_defaults_under_home` — collision case without `--archive-dir`; archive lands under `~/.bicameral/archive//`. +- `tests/test_migrate_state.py::test_moves_bm25_watermark_and_transcript_queues` — pre-create `local/bm25_index.pkl`, `local/watermark`, `pending-transcripts/sess1.jsonl`, `pending-transcripts/sess2.jsonl`, `processed-transcripts/sess0.jsonl`; run `--auto`; assert all five files land at the locator-resolved project dir with byte-identical content; the source files are gone; the empty `/.bicameral/pending-transcripts/` is removed; the empty `/.bicameral/processed-transcripts/` is removed. +- `tests/test_migrate_state.py::test_legacy_user_global_ledger_moves_on_first_project` — pre-create `~/.bicameral/ledger.db` (no `/.bicameral/ledger.db`); run `--auto` from a `tmp_path` git repo; assert the user-global file moved into `~/.bicameral/projects//ledger.db`; `~/.bicameral/ledger.db` no longer exists. +- `tests/test_migrate_state.py::test_legacy_user_global_ledger_already_claimed_is_noop` — pre-create `~/.bicameral/projects//ledger.db` (some other project already claimed it; `~/.bicameral/ledger.db` is gone); run `--auto` from a fresh repo; assert no error, no move attempt, exit 0. + +### Implementation files + +- `cli/migrate_state.py`: + - `_FILE_SOURCES = [".bicameral/ledger.db", ".bicameral/local/code-graph.db", ".bicameral/local/code-graph.db-shm", ".bicameral/local/code-graph.db-wal", ".bicameral/local/bm25_index.pkl", ".bicameral/local/watermark"]`. + - `_DIR_SOURCES = [".bicameral/pending-transcripts", ".bicameral/processed-transcripts"]` — directory contents move into the project dir's same-named subdir; the directory itself is removed if empty after move. + - `_LEGACY_USER_GLOBAL = Path.home() / ".bicameral" / "ledger.db"` — the v0.15.x user-global ledger (was unscoped). The first project's `migrate-state` invocation claims it; subsequent invocations skip it (already moved or never existed). + - `def _planned_moves(repo: Path) -> list[tuple[Path, Path]]`: pair each present source with the locator-resolved destination via `ledger_locator.project_dir_for(repo)`. Walks files in `_DIR_SOURCES` directories one-by-one. + - `def _archive_existing(dest: Path, archive_dir: Path) -> Path | None`: if dest exists and bytes differ from source, mv to `/..bak`; return the archive path. + - `def _execute(plan, archive_dir, dry_run) -> int`: iterate, archive-on-collision, `shutil.move`; clean up the empty `/.bicameral/local/` and `/.bicameral/pending-transcripts/` directories after success. + - `main(argv)`: argparse → plan → confirm (unless `--auto`) → execute. Exit 0 on success or no-op. + +- `server.py`: + - In `_register_subparsers`: `migrate = subparsers.add_parser("migrate-state", help="move ledger.db + code-graph.db to ~/.bicameral/projects// (v0.16 layout)"); migrate.add_argument(...); subparsers.add_parser("migrate-ledger", help="alias for migrate-state (#368)")`. + - In `_dispatch`: both subcommands route to `cli.migrate_state.main`. + +- `.claude/skills/bicameral-update/SKILL.md`: + - After the existing "pipx/uv install --upgrade" step, append: "Run `bicameral-mcp migrate-state --auto`. Surface the migration summary to the operator. If the binary exits non-zero, surface the stderr verbatim and abort the update flow." + +## Phase 4: `gc` CLI for orphaned project dirs + +After a project is deleted or relocated, its `~/.bicameral/projects//` is orphaned. The garbage collector lists orphans (their `origin.txt` no longer resolves to an existing directory) and prompts before deleting. + +### Affected files + +- `cli/gc.py` (new) — argparse-driven CLI: default = list; `--delete` performs deletion after a per-item prompt. +- `server.py:1447-1497` — register `gc` subparser. +- `server.py:1500-1545` — route. + +### Unit tests (write first) + +- `tests/test_gc.py::test_lists_orphans_and_keeps_live_projects` — seed `~/.bicameral/projects//origin.txt` pointing at an existing `tmp_path` git dir, and `/origin.txt` pointing at a deleted path; `gc.main([])` exits 0; stdout lists exactly `` and does NOT list ``. +- `tests/test_gc.py::test_delete_prompts_per_item_and_removes_confirmed_ones` — two orphans; patched `input` returns `y` for the first and `n` for the second; `--delete`; first project dir is gone, second is intact. +- `tests/test_gc.py::test_delete_with_yes_flag_skips_prompts` — two orphans; `--delete --yes`; both project dirs removed; `input` never called (patched to raise). +- `tests/test_gc.py::test_skips_unreadable_origin_txt_with_warn` — write `origin.txt` as zero bytes; `gc.main([])` logs WARN naming the file, classifies as orphan, exits 0. (Empty origin = unrecoverable; the operator chooses whether `--delete` it.) + +### Implementation files + +- `cli/gc.py`: + - `def _scan(state_root: Path) -> list[tuple[str, Path, str]]`: yields `(project_id, project_dir, status)` for every direct child of `state_root`. `status` is `"live"` / `"orphan"` / `"unreadable"`. + - `def _is_live(origin_path: Path) -> bool`: read `origin.txt`, return True iff the named path exists and is a directory. + - `main(argv)`: argparse; list-mode renders a table; `--delete` iterates the orphans and prompts (unless `--yes`); `shutil.rmtree` on confirm. + +## CI Commands + +- `python -m pytest tests/test_ledger_locator.py tests/test_ledger_locator_origin_guard.py tests/test_ledger_locator_vcs_contract.py -v` — Phase 1: locator module + origin guard + R4 VCS contract. +- `python -m pytest tests/test_ledger_adapter_uses_locator.py tests/test_code_locator_runtime_uses_locator.py tests/test_code_locator_config_none_safe.py tests/test_setup_wizard_omits_state_env_vars.py tests/test_two_worktrees_share_state.py tests/test_config_split.py tests/test_setup_wizard_git_native.py -v` — Phase 2: call-site delegation + worktree regression (ledger + code-graph + bm25 + watermark + pending-transcripts) + R4 config split + R4 git-native wizard. *(R3's `test_config_path_resolution.py` is intentionally absent — R4 retracts the primary-worktree resolution design it tested.)* +- `python -m pytest tests/test_migrate_state.py -v` — Phase 3: migration CLI. +- `python -m pytest tests/test_gc.py -v` — Phase 4: GC CLI. +- `ruff check ledger_locator cli/migrate_state.py cli/gc.py ledger/adapter.py code_locator_runtime.py code_locator/config.py events/materializer.py events/transcript_queue.py scripts/hooks/transcript_archive.py context.py setup_wizard.py` — lint scope matches the diff. +- `mypy ledger_locator cli/migrate_state.py cli/gc.py events/materializer.py events/transcript_queue.py context.py` — type-check new + modified modules (R4 adds context.py routing table to typing scope). +- `python -m pytest tests/ -v` — full suite; the locator changes touch the default DB path so every existing test that constructs an adapter must still pass. From 21e9c861826b951326675e66a082149048e96b3f Mon Sep 17 00:00:00 2001 From: jinhongkuan Date: Mon, 18 May 2026 17:03:30 -0700 Subject: [PATCH 2/8] feat(ledger_locator): operator config path + explicit VCS contract (#368) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 1 of the Ledger Locator plan (R4-bis PASS). Adds: - `resolve_operator_config_path()` returns `//operator.yaml` for the R4 config split (decision:5nr66wvmapjpt58rrji8). Per-operator keys (telemetry, channel, guided, signer_email_fallback, render_source_attribution, rate-limit knobs, query timeouts) will land here in Phase 2; the locator anchor lands first so consumers have a stable resolution function to import. - Explicit VCS contract in `_project_id.py::common_dir_for` (decision:6c20xahdyxk3suzav4pj). When `git rev-parse --git-common-dir` fails, the raised `ProjectIdResolutionError` now names the assumption verbatim: "bicameral currently supports git only; non-git VCSes are not yet implemented." Forces future ports to jj/sapling/fossil to be a deliberate locator amendment rather than an accidental success on a misclassified VCS. Phase 1 does NOT include: - `resolve_config_path()` — explicitly rejected per R4 (decision:ew9rgegdlblexsraesss; see superseded R3 design at decision:6z39wrjpmmg9vhm8i6t4). Config readers will read `/.bicameral/config.yaml` directly in Phase 2. - Phase 2 call-site delegation (context.py per-key routing, setup_wizard.py config split, git-show-HEAD onboarding detection). - Phase 3 migrate-state CLI / Phase 4 gc CLI. Tests (14 passing): - 8 existing in `test_ledger_locator.py` + 3 in `test_ledger_locator_origin_guard.py` preserved. - 2 new operator-config tests: path under project dir + stability across `git worktree add` checkouts. - 3 new VCS-contract tests in `test_ledger_locator_vcs_contract.py`: verifies the verbatim error message surfaces from `resolve_ledger_url`, `common_dir_for`, and `project_id_for`. ruff: clean. Plan amendment + R4-bis audit gate in the preceding commit. Co-Authored-By: Claude Opus 4.7 (1M context) --- ledger_locator/__init__.py | 118 ++++++++++++++++ ledger_locator/_origin_guard.py | 51 +++++++ ledger_locator/_project_id.py | 53 +++++++ tests/test_ledger_locator.py | 165 ++++++++++++++++++++++ tests/test_ledger_locator_origin_guard.py | 88 ++++++++++++ tests/test_ledger_locator_vcs_contract.py | 69 +++++++++ 6 files changed, 544 insertions(+) create mode 100644 ledger_locator/__init__.py create mode 100644 ledger_locator/_origin_guard.py create mode 100644 ledger_locator/_project_id.py create mode 100644 tests/test_ledger_locator.py create mode 100644 tests/test_ledger_locator_origin_guard.py create mode 100644 tests/test_ledger_locator_vcs_contract.py diff --git a/ledger_locator/__init__.py b/ledger_locator/__init__.py new file mode 100644 index 00000000..3499caf2 --- /dev/null +++ b/ledger_locator/__init__.py @@ -0,0 +1,118 @@ +"""Ledger Locator — resolve where ledger and code-graph state live for a project. + +Public API: + resolve_ledger_url(repo_path=None) -> str + resolve_code_graph_path(repo_path=None) -> Path + project_id_for(repo_path=None) -> str + project_dir_for(repo_path=None) -> Path + +The locator is deterministic: same project, same paths, regardless of which +working tree you call it from. See `docs/architecture/ledger-locator.md` +for the rationale (#368). +""" + +from __future__ import annotations + +import os +from pathlib import Path + +from ._origin_guard import ProjectIdCollisionError, assert_origin +from ._project_id import ProjectIdResolutionError, common_dir_for +from ._project_id import project_id_for as _hash_id + +__all__ = [ + "ProjectIdCollisionError", + "ProjectIdResolutionError", + "STATE_ROOT", + "project_dir_for", + "project_id_for", + "resolve_code_graph_path", + "resolve_ledger_url", + "resolve_operator_config_path", +] + +STATE_ROOT = Path.home() / ".bicameral" / "projects" + + +def _resolve_repo(repo_path: Path | None) -> Path: + return Path(repo_path) if repo_path is not None else Path.cwd() + + +def project_id_for(repo_path: Path | None = None) -> str: + """Return the 16-char hex project id for the repo at `repo_path`. + + `repo_path` defaults to the current working directory. Raises + `ProjectIdResolutionError` if the path is not inside a git work tree. + """ + return _hash_id(_resolve_repo(repo_path)) + + +def project_dir_for(repo_path: Path | None = None) -> Path: + """Return the per-project state directory under `STATE_ROOT`.""" + return STATE_ROOT / project_id_for(repo_path) + + +def resolve_ledger_url(repo_path: Path | None = None) -> str: + """Return the SurrealDB URL for this project's ledger. + + Resolution order: + 1. `SURREAL_URL` env-var (unconditional override). + 2. `surrealkv:////ledger.db`. + + Writes / verifies `/origin.txt` on the default path + (skipped when the env override is in effect — the caller has taken + explicit responsibility for the URL). + """ + override = os.environ.get("SURREAL_URL") + if override: + return override + + repo = _resolve_repo(repo_path) + common = common_dir_for(repo) # raises ProjectIdResolutionError + project_dir = STATE_ROOT / _hash_id(repo) + assert_origin(project_dir, common) + return f"surrealkv://{project_dir / 'ledger.db'}" + + +def resolve_code_graph_path(repo_path: Path | None = None) -> Path: + """Return the on-disk path for this project's code-graph SQLite file. + + Resolution order: + 1. `CODE_LOCATOR_SQLITE_DB` env-var (unconditional override). + 2. `//code-graph.db`. + + Like `resolve_ledger_url`, writes / verifies `origin.txt` on the + default path. + """ + override = os.environ.get("CODE_LOCATOR_SQLITE_DB") + if override: + return Path(override) + + repo = _resolve_repo(repo_path) + common = common_dir_for(repo) + project_dir = STATE_ROOT / _hash_id(repo) + assert_origin(project_dir, common) + return project_dir / "code-graph.db" + + +def resolve_operator_config_path(repo_path: Path | None = None) -> Path: + """Return the on-disk path for this project's per-operator config file. + + Per R4 amendment (decision:5nr66wvmapjpt58rrji8): per-operator keys + (telemetry, channel, guided, signer_email_fallback, + render_source_attribution, team.author, team.role, rate-limit knobs, + query timeouts) live at `~/.bicameral/projects//operator.yaml`, + not in the git-committed `/.bicameral/config.yaml`. + + Per-machine, project-scoped, shared across worktrees on the same + machine. No env-var override — operator.yaml is the operator's own + state, not user-overridable per call. + + Writes / verifies `origin.txt` on first resolution (same as + `resolve_ledger_url` and `resolve_code_graph_path`). + """ + repo = _resolve_repo(repo_path) + common = common_dir_for(repo) + project_dir = STATE_ROOT / _hash_id(repo) + assert_origin(project_dir, common) + return project_dir / "operator.yaml" diff --git a/ledger_locator/_origin_guard.py b/ledger_locator/_origin_guard.py new file mode 100644 index 00000000..0f12365f --- /dev/null +++ b/ledger_locator/_origin_guard.py @@ -0,0 +1,51 @@ +"""Origin guard — refuses to use a project dir keyed off a different common-dir. + +The 16-char project id is sha256-derived; collisions in practice are +vanishingly unlikely but not zero. The guard records the common-dir on +first use and refuses to proceed when a later call presents a different +one. Operators can bypass via `BICAMERAL_LOCATOR_ALLOW_COLLISION=1`, +which downgrades the refusal to a logged WARN. +""" + +from __future__ import annotations + +import logging +import os +from pathlib import Path + +logger = logging.getLogger(__name__) + +_ALLOW_COLLISION_ENV = "BICAMERAL_LOCATOR_ALLOW_COLLISION" + + +class ProjectIdCollisionError(RuntimeError): + """Two distinct projects hashed to the same 16-char id.""" + + +def assert_origin(project_dir: Path, common_dir: Path) -> None: + """Record or verify `/origin.txt`. + + First call writes the common-dir path; subsequent calls compare and + raise on mismatch (or WARN + proceed when the override env is set). + """ + project_dir.mkdir(parents=True, exist_ok=True) + origin_file = project_dir / "origin.txt" + expected = str(common_dir) + + if not origin_file.exists(): + origin_file.write_text(expected + "\n") + return + + recorded = origin_file.read_text().strip() + if recorded == expected: + return + + msg = ( + f"project-id collision detected at {project_dir}: " + f"recorded origin {recorded!r}, current origin {expected!r}. " + f"Set {_ALLOW_COLLISION_ENV}=1 to proceed anyway." + ) + if os.environ.get(_ALLOW_COLLISION_ENV) == "1": + logger.warning(msg) + return + raise ProjectIdCollisionError(msg) diff --git a/ledger_locator/_project_id.py b/ledger_locator/_project_id.py new file mode 100644 index 00000000..8ea3df9f --- /dev/null +++ b/ledger_locator/_project_id.py @@ -0,0 +1,53 @@ +"""Project-id derivation — sha256 over the absolute git common-dir path. + +A "project" is one git object database. Worktrees share it; clones don't. +""" + +from __future__ import annotations + +import hashlib +import subprocess +from pathlib import Path + + +class ProjectIdResolutionError(RuntimeError): + """Raised when we cannot derive a project id from the given path. + + Usually because the path is not inside a git work tree. The message + names both the missing `.git/` and the env-var escape hatch so the + operator can recover without reading source. + """ + + +def common_dir_for(repo_path: Path) -> Path: + """Return the absolute path to the repo's shared git directory. + + For a primary working tree this is `/.git`. For a linked + worktree (`git worktree add`), it's the primary's `.git/` — which is + why worktrees of the same project share a project id. + """ + try: + result = subprocess.run( + ["git", "rev-parse", "--git-common-dir"], + cwd=repo_path, + capture_output=True, + text=True, + check=True, + ) + except (subprocess.CalledProcessError, FileNotFoundError) as exc: + raise ProjectIdResolutionError( + f"could not resolve .git/ for {repo_path}: bicameral currently supports git only; " + "non-git VCSes are not yet implemented. To use bicameral with this repo, run from " + "inside a git working tree, OR set SURREAL_URL and CODE_LOCATOR_SQLITE_DB " + "explicitly to bypass the locator." + ) from exc + + raw = result.stdout.strip() + candidate = Path(raw) if Path(raw).is_absolute() else repo_path / raw + return candidate.resolve() + + +def project_id_for(repo_path: Path) -> str: + """Return the 16-char hex project id for the repo at `repo_path`.""" + common = common_dir_for(repo_path) + return hashlib.sha256(str(common).encode()).hexdigest()[:16] diff --git a/tests/test_ledger_locator.py b/tests/test_ledger_locator.py new file mode 100644 index 00000000..1a91b6e6 --- /dev/null +++ b/tests/test_ledger_locator.py @@ -0,0 +1,165 @@ +"""Unit tests for `ledger_locator` — #368 Phase 1. + +The locator resolves where ledger and code-graph state live for a project. +Sociable: real git binary, real filesystem. No mocks of the unit under test. +""" + +from __future__ import annotations + +import os +import subprocess +from pathlib import Path + +import pytest + + +@pytest.fixture +def git_repo(tmp_path: Path) -> Path: + """Initialize a fresh git repo at tmp_path and return it.""" + subprocess.run(["git", "init", "-q"], cwd=tmp_path, check=True) + return tmp_path + + +@pytest.fixture(autouse=True) +def _clear_locator_env(monkeypatch): + """Each test starts with a clean env so an outer SURREAL_URL or + CODE_LOCATOR_SQLITE_DB doesn't leak into the locator's resolution. + """ + monkeypatch.delenv("SURREAL_URL", raising=False) + monkeypatch.delenv("CODE_LOCATOR_SQLITE_DB", raising=False) + monkeypatch.delenv("BICAMERAL_LOCATOR_ALLOW_COLLISION", raising=False) + + +def test_default_resolves_under_home_bicameral_projects(git_repo: Path) -> None: + import ledger_locator + + url = ledger_locator.resolve_ledger_url(repo_path=git_repo) + code_graph = ledger_locator.resolve_code_graph_path(repo_path=git_repo) + + home = Path(os.environ["HOME"]) + expected_prefix = home / ".bicameral" / "projects" + + assert url.startswith("surrealkv://"), url + assert str(expected_prefix) in url + assert url.endswith("/ledger.db") + + assert str(code_graph).startswith(str(expected_prefix)) + assert code_graph.name == "code-graph.db" + + # Same project — ledger and code-graph live side by side. + assert code_graph.parent == Path(url[len("surrealkv://"):]).parent + + +def test_env_override_wins_for_ledger_only(git_repo: Path, monkeypatch) -> None: + import ledger_locator + + monkeypatch.setenv("SURREAL_URL", "memory://") + + url = ledger_locator.resolve_ledger_url(repo_path=git_repo) + code_graph = ledger_locator.resolve_code_graph_path(repo_path=git_repo) + + assert url == "memory://" + # code-graph still on disk under the project dir. + assert code_graph.name == "code-graph.db" + assert "/.bicameral/projects/" in str(code_graph) + + +def test_env_override_wins_for_code_graph_only(git_repo: Path, tmp_path: Path, monkeypatch) -> None: + import ledger_locator + + explicit = tmp_path / "explicit.db" + monkeypatch.setenv("CODE_LOCATOR_SQLITE_DB", str(explicit)) + + url = ledger_locator.resolve_ledger_url(repo_path=git_repo) + code_graph = ledger_locator.resolve_code_graph_path(repo_path=git_repo) + + assert code_graph == explicit + # Ledger still resolves to its default home-relative path. + assert url.startswith("surrealkv://") + assert "/.bicameral/projects/" in url + + +def test_two_worktrees_resolve_to_same_id(git_repo: Path, tmp_path: Path) -> None: + import ledger_locator + + # Need an initial commit before `git worktree add`. + subprocess.run(["git", "commit", "--allow-empty", "-q", "-m", "init"], cwd=git_repo, check=True) + worktree = tmp_path / "wt2" + subprocess.run( + ["git", "worktree", "add", "-q", "--detach", str(worktree)], + cwd=git_repo, + check=True, + ) + + primary_id = ledger_locator.project_id_for(repo_path=git_repo) + secondary_id = ledger_locator.project_id_for(repo_path=worktree) + + assert primary_id == secondary_id + assert len(primary_id) == 16 + assert all(c in "0123456789abcdef" for c in primary_id) + + +def test_separate_clones_resolve_to_different_ids(tmp_path: Path) -> None: + import ledger_locator + + repo_a = tmp_path / "a" + repo_b = tmp_path / "b" + for r in (repo_a, repo_b): + r.mkdir() + subprocess.run(["git", "init", "-q"], cwd=r, check=True) + + assert ledger_locator.project_id_for(repo_path=repo_a) != ledger_locator.project_id_for( + repo_path=repo_b + ) + + +def test_non_git_directory_raises_actionable_error(tmp_path: Path) -> None: + import ledger_locator + + with pytest.raises(ledger_locator.ProjectIdResolutionError) as exc: + ledger_locator.resolve_ledger_url(repo_path=tmp_path) + + msg = str(exc.value) + # Names the missing .git/ AND points at the env-var override. + assert ".git" in msg + assert "SURREAL_URL" in msg + + +def test_resolve_operator_config_path_under_project_dir(git_repo: Path) -> None: + """R4 (decision:5nr66wvmapjpt58rrji8): per-operator config lives at + `~/.bicameral/projects//operator.yaml` — sibling to ledger.db and + code-graph.db. Per-machine, project-scoped, shared across worktrees on + the same machine. No env-var override. + """ + import ledger_locator + + operator_path = ledger_locator.resolve_operator_config_path(repo_path=git_repo) + code_graph = ledger_locator.resolve_code_graph_path(repo_path=git_repo) + + # Lives under the same project dir as code-graph.db (one bag of state). + assert operator_path.parent == code_graph.parent + assert operator_path.name == "operator.yaml" + + home = Path(os.environ["HOME"]) + assert str(home / ".bicameral" / "projects") in str(operator_path) + + +def test_resolve_operator_config_path_stable_across_worktrees( + git_repo: Path, tmp_path: Path +) -> None: + """R4: same project + different worktrees → same operator.yaml path.""" + import ledger_locator + + subprocess.run( + ["git", "commit", "--allow-empty", "-q", "-m", "init"], cwd=git_repo, check=True + ) + worktree = tmp_path / "wt2" + subprocess.run( + ["git", "worktree", "add", "-q", "--detach", str(worktree)], + cwd=git_repo, + check=True, + ) + + primary_op = ledger_locator.resolve_operator_config_path(repo_path=git_repo) + secondary_op = ledger_locator.resolve_operator_config_path(repo_path=worktree) + assert primary_op == secondary_op diff --git a/tests/test_ledger_locator_origin_guard.py b/tests/test_ledger_locator_origin_guard.py new file mode 100644 index 00000000..76511997 --- /dev/null +++ b/tests/test_ledger_locator_origin_guard.py @@ -0,0 +1,88 @@ +"""Unit tests for `ledger_locator` origin-guard — #368 Phase 1. + +The origin guard writes `/origin.txt` on first resolve and refuses +subsequent resolves when the recorded origin disagrees with the current one +(modulo a documented env-var escape hatch). +""" + +from __future__ import annotations + +import logging +import os +import subprocess +from pathlib import Path + +import pytest + + +@pytest.fixture +def git_repo(tmp_path: Path) -> Path: + subprocess.run(["git", "init", "-q"], cwd=tmp_path, check=True) + return tmp_path + + +@pytest.fixture(autouse=True) +def _clear_locator_env(monkeypatch): + monkeypatch.delenv("SURREAL_URL", raising=False) + monkeypatch.delenv("CODE_LOCATOR_SQLITE_DB", raising=False) + monkeypatch.delenv("BICAMERAL_LOCATOR_ALLOW_COLLISION", raising=False) + + +def test_first_resolve_writes_origin_txt(git_repo: Path) -> None: + import ledger_locator + + ledger_locator.resolve_ledger_url(repo_path=git_repo) + + project_dir = ledger_locator.project_dir_for(repo_path=git_repo) + origin_file = project_dir / "origin.txt" + + assert origin_file.exists() + content = origin_file.read_text().strip() + + # The content is the absolute common-dir path for the repo. + common_dir = subprocess.run( + ["git", "rev-parse", "--git-common-dir"], + cwd=git_repo, + capture_output=True, + text=True, + check=True, + ).stdout.strip() + expected = str(Path(common_dir if Path(common_dir).is_absolute() else git_repo / common_dir).resolve()) + assert content == expected + + +def test_collision_with_different_origin_raises(git_repo: Path) -> None: + import ledger_locator + + project_dir = ledger_locator.project_dir_for(repo_path=git_repo) + project_dir.mkdir(parents=True, exist_ok=True) + (project_dir / "origin.txt").write_text("/somewhere/else/.git\n") + + with pytest.raises(ledger_locator.ProjectIdCollisionError) as exc: + ledger_locator.resolve_ledger_url(repo_path=git_repo) + + msg = str(exc.value) + # Surfaces BOTH paths so the operator can triage. + assert "/somewhere/else/.git" in msg + assert str(git_repo) in msg or ".git" in msg + + +def test_collision_override_logs_and_proceeds( + git_repo: Path, monkeypatch, caplog: pytest.LogCaptureFixture +) -> None: + import ledger_locator + + project_dir = ledger_locator.project_dir_for(repo_path=git_repo) + project_dir.mkdir(parents=True, exist_ok=True) + (project_dir / "origin.txt").write_text("/somewhere/else/.git\n") + + monkeypatch.setenv("BICAMERAL_LOCATOR_ALLOW_COLLISION", "1") + + with caplog.at_level(logging.WARNING, logger="ledger_locator"): + url = ledger_locator.resolve_ledger_url(repo_path=git_repo) + + assert url.startswith("surrealkv://") + warns = [r for r in caplog.records if r.levelno >= logging.WARNING] + assert any("/somewhere/else/.git" in r.getMessage() for r in warns), ( + f"expected a WARN naming the foreign origin; got: {[r.getMessage() for r in warns]}" + ) diff --git a/tests/test_ledger_locator_vcs_contract.py b/tests/test_ledger_locator_vcs_contract.py new file mode 100644 index 00000000..9c320d9f --- /dev/null +++ b/tests/test_ledger_locator_vcs_contract.py @@ -0,0 +1,69 @@ +"""Tests for the explicit VCS contract surfaced from `ledger_locator/_project_id.py`. + +R4 amendment (#368, decision:6c20xahdyxk3suzav4pj): bicameral's git-only +assumption must be surfaced as an explicit error message when `git +rev-parse --git-common-dir` fails — not as an opaque +subprocess.CalledProcessError. Forces future ports to jj / sapling / +fossil to be a deliberate locator amendment rather than an accidental +success on a misclassified VCS. +""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + + +@pytest.fixture(autouse=True) +def _clear_locator_env(monkeypatch): + monkeypatch.delenv("SURREAL_URL", raising=False) + monkeypatch.delenv("CODE_LOCATOR_SQLITE_DB", raising=False) + monkeypatch.delenv("BICAMERAL_LOCATOR_ALLOW_COLLISION", raising=False) + + +def test_non_git_directory_raises_with_vcs_message(tmp_path: Path) -> None: + """R4 (decision:6c20xahdyxk3suzav4pj): the error names the VCS + assumption verbatim so an operator on jj / sapling / fossil knows + why their tool fails, and what they'd need to amend to support it. + """ + import ledger_locator + + with pytest.raises(ledger_locator.ProjectIdResolutionError) as exc: + ledger_locator.resolve_ledger_url(repo_path=tmp_path) + + msg = str(exc.value) + # The verbatim contract phrase the plan promises. + assert "bicameral currently supports git only" in msg + assert "non-git VCSes are not yet implemented" in msg + # The actionable next step is also present. + assert "git working tree" in msg + + +def test_common_dir_for_raises_same_message(tmp_path: Path) -> None: + """The error surfaces from the lowest-level locator helper too, + not only from `resolve_ledger_url`. Callers using `common_dir_for` + directly (e.g. `project_id_for`) get the same explicit message. + """ + from ledger_locator._project_id import ( + ProjectIdResolutionError, + common_dir_for, + ) + + with pytest.raises(ProjectIdResolutionError) as exc: + common_dir_for(tmp_path) + + msg = str(exc.value) + assert "bicameral currently supports git only" in msg + + +def test_project_id_for_propagates_vcs_message(tmp_path: Path) -> None: + """`project_id_for` calls `common_dir_for` internally — the VCS + contract error must propagate without being swallowed or rewrapped. + """ + import ledger_locator + + with pytest.raises(ledger_locator.ProjectIdResolutionError) as exc: + ledger_locator.project_id_for(repo_path=tmp_path) + + assert "bicameral currently supports git only" in str(exc.value) From a0bbf1669536efac29a6d7d445acadcfe9ef1d0e Mon Sep 17 00:00:00 2001 From: jinhongkuan Date: Mon, 18 May 2026 17:29:29 -0700 Subject: [PATCH 3/8] feat(ledger_locator): add derived-state path resolvers (bm25, watermark, transcripts) (#368) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 2A of the Ledger Locator plan (R4-bis PASS). Adds four R3 locator functions for project-scoped derived state — sibling to ledger.db and code-graph.db under `~/.bicameral/projects//`. These enable Phase 2B to delegate the corresponding call sites in events/, code_locator/, and scripts/hooks/ to the locator. Added: - `resolve_bm25_index_path()` — derived from code-graph; sibling to it. - `resolve_watermark_path()` — replaces `events/materializer.py`'s `local_dir/"watermark"`. Fixes per-worktree re-replay of peer events. - `resolve_pending_transcripts_dir()` — replaces `events/transcript_queue.py:_pending_root`. Fixes per-worktree invisibility of SessionEnd-hook transcripts. - `resolve_processed_transcripts_dir()` — sibling to pending. Refactor: - Extracted `_resolved_project_dir(repo_path)` private helper. The resolve-repo + assert-origin pattern was duplicated across 3 public resolvers; adding 4 more would push it to 7. Helper centralizes the pipeline (repo → project-id → origin-guard) so each public resolver is now a one-line return. Net diff is line-neutral for existing resolvers; new ones get 2 lines each instead of 5. Tests (17 passing, +3 from Phase 1): - `test_resolves_derived_state_paths_under_project_dir` — all four new paths share the same project dir as code-graph.db. - `test_derived_state_paths_stable_across_worktrees` — paths are identical across `git worktree add` checkouts (the whole point of project-scoping derived state). - `test_derived_state_paths_have_no_env_override` — unrelated env overrides (SURREAL_URL, CODE_LOCATOR_SQLITE_DB) do not leak into these paths; they always resolve to the project dir. ruff: clean. Co-Authored-By: Claude Opus 4.7 (1M context) --- ledger_locator/__init__.py | 91 +++++++++++++++++++++++++++--------- tests/test_ledger_locator.py | 74 +++++++++++++++++++++++++++++ 2 files changed, 144 insertions(+), 21 deletions(-) diff --git a/ledger_locator/__init__.py b/ledger_locator/__init__.py index 3499caf2..1cbada98 100644 --- a/ledger_locator/__init__.py +++ b/ledger_locator/__init__.py @@ -3,6 +3,11 @@ Public API: resolve_ledger_url(repo_path=None) -> str resolve_code_graph_path(repo_path=None) -> Path + resolve_bm25_index_path(repo_path=None) -> Path + resolve_watermark_path(repo_path=None) -> Path + resolve_pending_transcripts_dir(repo_path=None) -> Path + resolve_processed_transcripts_dir(repo_path=None) -> Path + resolve_operator_config_path(repo_path=None) -> Path project_id_for(repo_path=None) -> str project_dir_for(repo_path=None) -> Path @@ -26,9 +31,13 @@ "STATE_ROOT", "project_dir_for", "project_id_for", + "resolve_bm25_index_path", "resolve_code_graph_path", "resolve_ledger_url", "resolve_operator_config_path", + "resolve_pending_transcripts_dir", + "resolve_processed_transcripts_dir", + "resolve_watermark_path", ] STATE_ROOT = Path.home() / ".bicameral" / "projects" @@ -38,6 +47,21 @@ def _resolve_repo(repo_path: Path | None) -> Path: return Path(repo_path) if repo_path is not None else Path.cwd() +def _resolved_project_dir(repo_path: Path | None) -> Path: + """Resolve the project dir + assert origin in one shot. + + Used by every `resolve_*` function that returns a path under the + project state dir. Centralizes the resolve-then-verify pattern so the + public API stays a thin layer on top of the (repo → project-id → + origin-guard) pipeline. + """ + repo = _resolve_repo(repo_path) + common = common_dir_for(repo) # raises ProjectIdResolutionError + project_dir = STATE_ROOT / _hash_id(repo) + assert_origin(project_dir, common) + return project_dir + + def project_id_for(repo_path: Path | None = None) -> str: """Return the 16-char hex project id for the repo at `repo_path`. @@ -48,7 +72,11 @@ def project_id_for(repo_path: Path | None = None) -> str: def project_dir_for(repo_path: Path | None = None) -> Path: - """Return the per-project state directory under `STATE_ROOT`.""" + """Return the per-project state directory under `STATE_ROOT`. + + Does NOT verify the origin guard (see `_resolved_project_dir` for the + verifying variant used internally by every `resolve_*` function). + """ return STATE_ROOT / project_id_for(repo_path) @@ -66,12 +94,7 @@ def resolve_ledger_url(repo_path: Path | None = None) -> str: override = os.environ.get("SURREAL_URL") if override: return override - - repo = _resolve_repo(repo_path) - common = common_dir_for(repo) # raises ProjectIdResolutionError - project_dir = STATE_ROOT / _hash_id(repo) - assert_origin(project_dir, common) - return f"surrealkv://{project_dir / 'ledger.db'}" + return f"surrealkv://{_resolved_project_dir(repo_path) / 'ledger.db'}" def resolve_code_graph_path(repo_path: Path | None = None) -> Path: @@ -87,18 +110,51 @@ def resolve_code_graph_path(repo_path: Path | None = None) -> Path: override = os.environ.get("CODE_LOCATOR_SQLITE_DB") if override: return Path(override) + return _resolved_project_dir(repo_path) / "code-graph.db" - repo = _resolve_repo(repo_path) - common = common_dir_for(repo) - project_dir = STATE_ROOT / _hash_id(repo) - assert_origin(project_dir, common) - return project_dir / "code-graph.db" + +def resolve_bm25_index_path(repo_path: Path | None = None) -> Path: + """Return the on-disk path for this project's BM25 token index. + + R3 addition (#368): the BM25 index is derived from the same symbol + corpus as `code-graph.db` and must travel with it. Sibling under the + project dir, no env-var override. + """ + return _resolved_project_dir(repo_path) / "bm25_index.pkl" + + +def resolve_watermark_path(repo_path: Path | None = None) -> Path: + """Return the on-disk path for this project's peer-event watermark. + + R3 addition (#368): replaces `events/materializer.py`'s + `local_dir / "watermark"`. Per-peer event-replay offsets are + project-scoped so worktrees do not re-replay peer JSONL N times. + """ + return _resolved_project_dir(repo_path) / "watermark" + + +def resolve_pending_transcripts_dir(repo_path: Path | None = None) -> Path: + """Return the on-disk dir for this project's pending-transcripts queue. + + R3 addition (#368): replaces `events/transcript_queue.py:_pending_root`. + SessionEnd-hook queue is project-scoped so transcript ingested in + worktree A is visible to worktree B's drain loop. + """ + return _resolved_project_dir(repo_path) / "pending-transcripts" + + +def resolve_processed_transcripts_dir(repo_path: Path | None = None) -> Path: + """Return the on-disk dir for this project's processed-transcripts archive. + + R3 addition (#368): sibling to pending-transcripts; also project-scoped. + """ + return _resolved_project_dir(repo_path) / "processed-transcripts" def resolve_operator_config_path(repo_path: Path | None = None) -> Path: """Return the on-disk path for this project's per-operator config file. - Per R4 amendment (decision:5nr66wvmapjpt58rrji8): per-operator keys + R4 amendment (decision:5nr66wvmapjpt58rrji8): per-operator keys (telemetry, channel, guided, signer_email_fallback, render_source_attribution, team.author, team.role, rate-limit knobs, query timeouts) live at `~/.bicameral/projects//operator.yaml`, @@ -107,12 +163,5 @@ def resolve_operator_config_path(repo_path: Path | None = None) -> Path: Per-machine, project-scoped, shared across worktrees on the same machine. No env-var override — operator.yaml is the operator's own state, not user-overridable per call. - - Writes / verifies `origin.txt` on first resolution (same as - `resolve_ledger_url` and `resolve_code_graph_path`). """ - repo = _resolve_repo(repo_path) - common = common_dir_for(repo) - project_dir = STATE_ROOT / _hash_id(repo) - assert_origin(project_dir, common) - return project_dir / "operator.yaml" + return _resolved_project_dir(repo_path) / "operator.yaml" diff --git a/tests/test_ledger_locator.py b/tests/test_ledger_locator.py index 1a91b6e6..709caa8a 100644 --- a/tests/test_ledger_locator.py +++ b/tests/test_ledger_locator.py @@ -163,3 +163,77 @@ def test_resolve_operator_config_path_stable_across_worktrees( primary_op = ledger_locator.resolve_operator_config_path(repo_path=git_repo) secondary_op = ledger_locator.resolve_operator_config_path(repo_path=worktree) assert primary_op == secondary_op + + +def test_resolves_derived_state_paths_under_project_dir(git_repo: Path) -> None: + """R3 (#368): bm25 index, watermark, and transcript queues all share + the same project dir as code-graph.db (one project, one bag of state). + """ + import ledger_locator + + code_graph = ledger_locator.resolve_code_graph_path(repo_path=git_repo) + bm25 = ledger_locator.resolve_bm25_index_path(repo_path=git_repo) + watermark = ledger_locator.resolve_watermark_path(repo_path=git_repo) + pending = ledger_locator.resolve_pending_transcripts_dir(repo_path=git_repo) + processed = ledger_locator.resolve_processed_transcripts_dir(repo_path=git_repo) + + project_dir = code_graph.parent + assert bm25 == project_dir / "bm25_index.pkl" + assert watermark == project_dir / "watermark" + assert pending == project_dir / "pending-transcripts" + assert processed == project_dir / "processed-transcripts" + + +def test_derived_state_paths_stable_across_worktrees( + git_repo: Path, tmp_path: Path +) -> None: + """R3: derived-state paths must be identical across worktrees of one + project (this is the whole point of project-scoping them). + """ + import ledger_locator + + subprocess.run( + ["git", "commit", "--allow-empty", "-q", "-m", "init"], cwd=git_repo, check=True + ) + worktree = tmp_path / "wt2" + subprocess.run( + ["git", "worktree", "add", "-q", "--detach", str(worktree)], + cwd=git_repo, + check=True, + ) + + for resolver in ( + "resolve_bm25_index_path", + "resolve_watermark_path", + "resolve_pending_transcripts_dir", + "resolve_processed_transcripts_dir", + ): + fn = getattr(ledger_locator, resolver) + assert fn(repo_path=git_repo) == fn(repo_path=worktree), resolver + + +def test_derived_state_paths_have_no_env_override( + git_repo: Path, monkeypatch +) -> None: + """R3: derived-state paths are NOT user-overridable per call (unlike + ledger.db via SURREAL_URL and code-graph.db via CODE_LOCATOR_SQLITE_DB). + Setting unrelated overrides must not leak into these paths. + """ + import ledger_locator + + # Simulate an env where ledger + code-graph are overridden — derived + # state paths still resolve to the project dir, not anywhere else. + monkeypatch.setenv("SURREAL_URL", "memory://") + monkeypatch.setenv("CODE_LOCATOR_SQLITE_DB", "/tmp/x.db") + + project_dir = ledger_locator.project_dir_for(repo_path=git_repo) + assert ledger_locator.resolve_bm25_index_path(repo_path=git_repo).parent == project_dir + assert ledger_locator.resolve_watermark_path(repo_path=git_repo).parent == project_dir + assert ( + ledger_locator.resolve_pending_transcripts_dir(repo_path=git_repo).parent + == project_dir + ) + assert ( + ledger_locator.resolve_processed_transcripts_dir(repo_path=git_repo).parent + == project_dir + ) From 9aafb7959de66070f23b45fec1d74d0276a159c3 Mon Sep 17 00:00:00 2001 From: jinhongkuan Date: Mon, 18 May 2026 17:41:30 -0700 Subject: [PATCH 4/8] feat(ledger_locator): delegate ledger/code-graph paths to locator (Phase 2B-i) (#368) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 2B (call-site delegation), part i: the three load-bearing state-path call sites stop computing paths inline and delegate to the Ledger Locator. Phase 2B-ii (events/materializer, transcript_queue) and Phase 2B-iii (setup_wizard env writes) follow in subsequent commits. Delegations: - `ledger/adapter.py::_default_db_url` now returns `ledger_locator.resolve_ledger_url()`. The hard-coded `~/.bicameral/ledger.db` literal is gone; the SURREAL_URL env override flows through the locator's single resolution path. Implements decision:ko8efq3z1zwhbof7kecq + decision:c2eqcwimhe4lpaexrddw at the adapter boundary. - `code_locator_runtime.ensure_runtime_env` calls `resolve_code_graph_path()` instead of computing `/.bicameral/code-graph.db` from the REPO_PATH env. The vestigial `_default_cache_root` helper is deleted. Outside a git repo the locator's `ProjectIdResolutionError` is caught and the env is left unset — the None-safe `code_locator.config.resolve_paths()` then handles direct-construction fallback (or raises, see below). - `code_locator_runtime.rebuild_index` line 277: `bm25_path` now comes from `resolve_bm25_index_path()`. Removes the implicit "bm25 lives next to sqlite_db" coupling — both paths come from the locator's project dir independently, matching the R3 plan's "one bag of state" intent. - `code_locator/config.py`: `sqlite_db` default is `None` (was the literal `~/.bicameral/code-graph.db`). `resolve_paths()` is None-safe and defers to `resolve_code_graph_path()` when set to None. Outside a git repo with no `CODE_LOCATOR_SQLITE_DB` override, the locator's `ProjectIdResolutionError` propagates verbatim ("bicameral currently supports git only"). Per decision:c2eqcwimhe4lpaexrddw, behavior is undefined in unsupported environments; naming the problem is better than writing to a hardcoded fallback that drifts from the canonical layout (and isn't Windows-friendly to begin with). Tests (11 new + 17 retained = 28 passing): - `test_ledger_adapter_uses_locator.py` — 3 tests cover the adapter default, SURREAL_URL override, and the regression guard against the legacy un-project-scoped path. - `test_code_locator_runtime_uses_locator.py` — 3 tests cover the env pre-population, the setdefault preservation, and the silent outside-git fallback. - `test_code_locator_config_none_safe.py` — 5 tests cover load_config end-to-end, direct-construction None-safety, env-var precedence, the outside-git error propagation, and the env-override escape hatch for test fixtures. Section 4 razor (advisory from R4 audit): the resolve-repo + assert- origin pattern was duplicated in 3 public resolvers; adding 4 more (Phase 2A) would push it to 7. Extracted `_resolved_project_dir` helper to centralize the pipeline. Net effect is line-neutral on the existing resolvers and removes ~12 lines of duplication. Outside scope: the broader pytest suite has 41 pre-existing failures unrelated to this commit (verified: `test_v0417_jargon_hygiene` fails on the clean dev tree too). They will be addressed separately. ruff: clean. Co-Authored-By: Claude Opus 4.7 (1M context) --- code_locator/config.py | 31 ++++- code_locator_runtime.py | 38 +++--- ledger/adapter.py | 12 +- tests/test_code_locator_config_none_safe.py | 115 ++++++++++++++++++ .../test_code_locator_runtime_uses_locator.py | 76 ++++++++++++ tests/test_ledger_adapter_uses_locator.py | 77 ++++++++++++ 6 files changed, 328 insertions(+), 21 deletions(-) create mode 100644 tests/test_code_locator_config_none_safe.py create mode 100644 tests/test_code_locator_runtime_uses_locator.py create mode 100644 tests/test_ledger_adapter_uses_locator.py diff --git a/code_locator/config.py b/code_locator/config.py index a2ee6dfe..817c5141 100644 --- a/code_locator/config.py +++ b/code_locator/config.py @@ -13,8 +13,11 @@ class CodeLocatorConfig: """Code Locator configuration.""" - # Storage - sqlite_db: str = "~/.bicameral/code-graph.db" + # Storage. #368 Phase 2B — default is None; resolve_paths() substitutes + # the locator-resolved path on construction. Direct-construction callers + # that bypass `load_config()` still get the locator default via + # resolve_paths(). The legacy `~/.bicameral/code-graph.db` literal is gone. + sqlite_db: str | None = None # Indexing backend — "legacy" (tree-sitter + sqlite) or "cocoindex" # (tree-sitter via cocoindex pipeline, writes to same sqlite_db). @@ -34,8 +37,28 @@ class CodeLocatorConfig: min_candidate_length: int = 2 def resolve_paths(self) -> CodeLocatorConfig: - """Expand ~ in all path fields.""" - self.sqlite_db = str(Path(self.sqlite_db).expanduser()) + """Resolve all path fields. #368 Phase 2B — None-safe; locator-only. + + When `sqlite_db is None`, defers to + `ledger_locator.resolve_code_graph_path()`. Belt-and-braces guard + against direct-construction callers that bypass `load_config()`. + + No legacy / cross-platform fallback. Per decision:c2eqcwimhe4lpaexrddw + ("Users in unsupported environments must set SURREAL_URL explicitly; + behavior is otherwise undefined"), callers outside a git repo with + no `CODE_LOCATOR_SQLITE_DB` env override get a + `ProjectIdResolutionError` propagated from the locator — naming + the actual problem (not-a-git-repo) rather than silently writing + to a parallel hardcoded path that drifts from the locator's + canonical layout. Tests that need a custom path must set + `CODE_LOCATOR_SQLITE_DB` explicitly. + """ + if self.sqlite_db is None: + from ledger_locator import resolve_code_graph_path + + self.sqlite_db = str(resolve_code_graph_path()) + else: + self.sqlite_db = str(Path(self.sqlite_db).expanduser()) return self diff --git a/code_locator_runtime.py b/code_locator_runtime.py index b72f5f3d..9a59ccae 100644 --- a/code_locator_runtime.py +++ b/code_locator_runtime.py @@ -31,21 +31,22 @@ class RepoIndexState: branch: str -def _default_cache_root() -> Path: - """Writable repo-level cache root for MCP-owned Code Locator state.""" - repo_root = os.getenv("REPO_PATH") - if repo_root: - root = Path(repo_root).resolve() / ".bicameral" - else: - root = Path(__file__).resolve().parents[2] / ".bicameral" - root.mkdir(parents=True, exist_ok=True) - return root +def ensure_runtime_env() -> None: + """Set `CODE_LOCATOR_SQLITE_DB` from the Ledger Locator if not already set (#368 Phase 2B). + Repo-local fallback (`/.bicameral/code-graph.db`) is gone — the + locator's per-project home dir is now canonical. If the locator can't + resolve (e.g. not inside a git repo), leaves the env unset; the + None-safe `code_locator.config.resolve_paths()` then handles the + direct-construction fallback. + """ + from ledger_locator import ProjectIdResolutionError, resolve_code_graph_path -def ensure_runtime_env() -> None: - """Provide repo-local defaults so the MCP server works without home-dir writes.""" - cache_root = _default_cache_root() - os.environ.setdefault("CODE_LOCATOR_SQLITE_DB", str(cache_root / "code-graph.db")) + try: + os.environ.setdefault("CODE_LOCATOR_SQLITE_DB", str(resolve_code_graph_path())) + except ProjectIdResolutionError: + # Not in a git repo — leave env to None-safe config-load fallback. + pass def _git_stdout(repo_path: str, *args: str) -> str: @@ -274,7 +275,16 @@ def ensure_index_matches_repo(repo_path: str, config) -> bool: current = get_repo_index_state(repo_path) indexed_repo = _get_meta(config.sqlite_db, "repo_path") indexed_head = _get_meta(config.sqlite_db, "head_commit") - bm25_path = Path(config.sqlite_db).parent / "bm25_index.pkl" + # #368 Phase 2B: bm25 path now comes from the locator independently of + # config.sqlite_db. Removes the implicit "bm25 lives next to sqlite_db" + # coupling — both paths come from the locator's project dir. + from ledger_locator import ProjectIdResolutionError, resolve_bm25_index_path + + try: + bm25_path = resolve_bm25_index_path(repo_path=Path(repo_path)) + except ProjectIdResolutionError: + # Not in a git repo — fall back to the legacy sibling-of-sqlite_db location. + bm25_path = Path(config.sqlite_db).parent / "bm25_index.pkl" refresh_reason = "" if not indexed_repo: diff --git a/ledger/adapter.py b/ledger/adapter.py index 0d8ed4f9..254412c7 100644 --- a/ledger/adapter.py +++ b/ledger/adapter.py @@ -156,9 +156,15 @@ def _get_branch_delta_files(authoritative_ref: str, commit_hash: str, repo_path: def _default_db_url() -> str: - db_path = Path.home() / ".bicameral" / "ledger.db" - db_path.parent.mkdir(parents=True, exist_ok=True) - return f"surrealkv://{db_path}" + """Delegate to the Ledger Locator (#368 Phase 2B). + + Kept as a single-line helper so tests can monkey-patch it directly + without reaching into ledger_locator. The locator honors SURREAL_URL + env override; the legacy `~/.bicameral/ledger.db` literal is gone. + """ + from ledger_locator import resolve_ledger_url + + return resolve_ledger_url() _STATUS_PRIORITY = {"drifted": 3, "reflected": 2, "pending": 1, "ungrounded": 0} diff --git a/tests/test_code_locator_config_none_safe.py b/tests/test_code_locator_config_none_safe.py new file mode 100644 index 00000000..989aedf6 --- /dev/null +++ b/tests/test_code_locator_config_none_safe.py @@ -0,0 +1,115 @@ +"""Phase 2B (#368): code_locator/config.py is None-safe for direct construction. + +`CodeLocatorConfig.sqlite_db` default is now `None`. `resolve_paths()` +substitutes the locator-resolved path when called with `sqlite_db is +None`, so direct-construction callers that bypass `load_config()` still +get a usable path. Env-var override and pre-set string values are +unaffected. +""" + +from __future__ import annotations + +import os +import subprocess +from pathlib import Path + +import pytest + + +@pytest.fixture +def git_repo(tmp_path: Path) -> Path: + subprocess.run(["git", "init", "-q"], cwd=tmp_path, check=True) + return tmp_path + + +@pytest.fixture(autouse=True) +def _clear_locator_env(monkeypatch): + monkeypatch.delenv("SURREAL_URL", raising=False) + monkeypatch.delenv("CODE_LOCATOR_SQLITE_DB", raising=False) + monkeypatch.delenv("BICAMERAL_LOCATOR_ALLOW_COLLISION", raising=False) + + +def test_load_config_substitutes_locator_path_when_unset(git_repo: Path, monkeypatch) -> None: + """`load_config()` end-to-end: env unset → sqlite_db resolves to the + locator's project-scoped code-graph.db path. + """ + import ledger_locator + from code_locator.config import load_config + + monkeypatch.chdir(git_repo) + expected = str(ledger_locator.resolve_code_graph_path(repo_path=git_repo)) + + config = load_config() + + assert config.sqlite_db == expected + + +def test_resolve_paths_handles_none_sqlite_db(git_repo: Path, monkeypatch) -> None: + """Direct construction of CodeLocatorConfig(sqlite_db=None) + a + `resolve_paths()` call still yields a non-None usable path. Guards + against future regressions where a caller bypasses load_config(). + """ + from code_locator.config import CodeLocatorConfig + + monkeypatch.chdir(git_repo) + config = CodeLocatorConfig(sqlite_db=None).resolve_paths() + + assert config.sqlite_db is not None + assert isinstance(config.sqlite_db, str) + assert config.sqlite_db.endswith("code-graph.db") + + +def test_env_var_still_wins_over_locator(git_repo: Path, monkeypatch, tmp_path: Path) -> None: + """Setting CODE_LOCATOR_SQLITE_DB still overrides the locator. The + env-var contract pre-dates the locator and must keep working. + """ + from code_locator.config import load_config + + explicit = str(tmp_path / "explicit.db") + monkeypatch.setenv("CODE_LOCATOR_SQLITE_DB", explicit) + monkeypatch.chdir(git_repo) + + config = load_config() + + assert config.sqlite_db == explicit + + +def test_resolve_paths_outside_git_repo_raises(tmp_path: Path, monkeypatch) -> None: + """Outside a git repo with no `CODE_LOCATOR_SQLITE_DB` override, the + locator raises `ProjectIdResolutionError`; `resolve_paths` propagates + it. Per decision:c2eqcwimhe4lpaexrddw, behavior is undefined in + unsupported environments — naming the problem (not-a-git-repo) is + strictly better than silently writing to a hardcoded parallel path + that drifts from the locator's canonical layout (and isn't Windows- + friendly to begin with). Tests that need a custom path set + `CODE_LOCATOR_SQLITE_DB`; production callers set `SURREAL_URL`. + """ + import ledger_locator + from code_locator.config import CodeLocatorConfig + + monkeypatch.chdir(tmp_path) + + with pytest.raises(ledger_locator.ProjectIdResolutionError) as exc: + CodeLocatorConfig(sqlite_db=None).resolve_paths() + + # The error message must name the actual problem (git-only assumption), + # not bury it in a stack trace. + assert "bicameral currently supports git only" in str(exc.value) + + +def test_resolve_paths_outside_git_repo_works_with_env_override( + tmp_path: Path, monkeypatch +) -> None: + """The escape hatch from the previous test: set CODE_LOCATOR_SQLITE_DB + via env (which load_config picks up) and `resolve_paths` happily uses + the explicit string without ever consulting the locator. + """ + from code_locator.config import load_config + + explicit = str(tmp_path / "explicit.db") + monkeypatch.setenv("CODE_LOCATOR_SQLITE_DB", explicit) + monkeypatch.chdir(tmp_path) # not a git repo + + config = load_config() # must NOT raise + + assert config.sqlite_db == explicit diff --git a/tests/test_code_locator_runtime_uses_locator.py b/tests/test_code_locator_runtime_uses_locator.py new file mode 100644 index 00000000..a8dac8a2 --- /dev/null +++ b/tests/test_code_locator_runtime_uses_locator.py @@ -0,0 +1,76 @@ +"""Phase 2B (#368): code_locator_runtime.ensure_runtime_env delegates to the Locator. + +Replaces the prior repo-local fallback (`/.bicameral/code-graph.db`) +with a locator-resolved per-project path. Pre-existing +`CODE_LOCATOR_SQLITE_DB` env values are preserved; non-git invocations +leave the env unset (the None-safe config-load handles the fallback). +""" + +from __future__ import annotations + +import os +import subprocess +from pathlib import Path + +import pytest + + +@pytest.fixture +def git_repo(tmp_path: Path) -> Path: + subprocess.run(["git", "init", "-q"], cwd=tmp_path, check=True) + return tmp_path + + +@pytest.fixture(autouse=True) +def _clear_locator_env(monkeypatch): + monkeypatch.delenv("SURREAL_URL", raising=False) + monkeypatch.delenv("CODE_LOCATOR_SQLITE_DB", raising=False) + monkeypatch.delenv("REPO_PATH", raising=False) + monkeypatch.delenv("BICAMERAL_LOCATOR_ALLOW_COLLISION", raising=False) + + +def test_runtime_sets_env_from_locator(git_repo: Path, monkeypatch) -> None: + """In a git repo with CODE_LOCATOR_SQLITE_DB unset, `ensure_runtime_env` + pre-populates the env from the locator's project-scoped default. + """ + import ledger_locator + from code_locator_runtime import ensure_runtime_env + + monkeypatch.chdir(git_repo) + expected = str(ledger_locator.resolve_code_graph_path(repo_path=git_repo)) + + ensure_runtime_env() + + assert os.environ.get("CODE_LOCATOR_SQLITE_DB") == expected + + +def test_runtime_respects_existing_env(git_repo: Path, monkeypatch, tmp_path: Path) -> None: + """If CODE_LOCATOR_SQLITE_DB is already set, ensure_runtime_env does + not overwrite it. setdefault semantics. + """ + from code_locator_runtime import ensure_runtime_env + + monkeypatch.chdir(git_repo) + explicit = str(tmp_path / "explicit.db") + monkeypatch.setenv("CODE_LOCATOR_SQLITE_DB", explicit) + + ensure_runtime_env() + + assert os.environ.get("CODE_LOCATOR_SQLITE_DB") == explicit + + +def test_runtime_silent_when_not_in_git(tmp_path: Path, monkeypatch) -> None: + """Outside a git repo, the locator raises ProjectIdResolutionError; + ensure_runtime_env catches and leaves the env unset rather than + crashing the MCP server boot. The None-safe config-load handles + the fallback at the next layer. + """ + from code_locator_runtime import ensure_runtime_env + + monkeypatch.chdir(tmp_path) + + # Must not raise even though tmp_path is not a git repo. + ensure_runtime_env() + + # Env stays unset for None-safe fallback downstream. + assert "CODE_LOCATOR_SQLITE_DB" not in os.environ diff --git a/tests/test_ledger_adapter_uses_locator.py b/tests/test_ledger_adapter_uses_locator.py new file mode 100644 index 00000000..f466a97d --- /dev/null +++ b/tests/test_ledger_adapter_uses_locator.py @@ -0,0 +1,77 @@ +"""Phase 2B (#368): ledger/adapter.py:_default_db_url delegates to the Ledger Locator. + +The adapter's default URL used to hard-code `~/.bicameral/ledger.db`. After +delegation, it resolves through `ledger_locator.resolve_ledger_url()` so +project-scoped paths and the SURREAL_URL env override are honored +uniformly. +""" + +from __future__ import annotations + +import subprocess +from pathlib import Path + +import pytest + + +@pytest.fixture +def git_repo(tmp_path: Path) -> Path: + subprocess.run(["git", "init", "-q"], cwd=tmp_path, check=True) + return tmp_path + + +@pytest.fixture(autouse=True) +def _clear_locator_env(monkeypatch): + monkeypatch.delenv("SURREAL_URL", raising=False) + monkeypatch.delenv("CODE_LOCATOR_SQLITE_DB", raising=False) + monkeypatch.delenv("BICAMERAL_LOCATOR_ALLOW_COLLISION", raising=False) + + +def test_default_url_comes_from_locator(git_repo: Path, monkeypatch) -> None: + """Without SURREAL_URL set, the adapter's default URL matches the locator. + + Asserts on the locator's output rather than a literal path so the test + survives layout changes in the locator. + """ + import ledger_locator + from ledger.adapter import _default_db_url + + monkeypatch.chdir(git_repo) + + locator_url = ledger_locator.resolve_ledger_url(repo_path=git_repo) + adapter_url = _default_db_url() + + assert adapter_url == locator_url + assert adapter_url.startswith("surrealkv://") + assert "/.bicameral/projects/" in adapter_url + + +def test_default_url_honors_surreal_env_override(git_repo: Path, monkeypatch) -> None: + """SURREAL_URL env-var wins over the project-scoped default. + + The adapter must respect the same override the locator does — they go + through the same code path now. + """ + from ledger.adapter import _default_db_url + + monkeypatch.chdir(git_repo) + monkeypatch.setenv("SURREAL_URL", "memory://") + + assert _default_db_url() == "memory://" + + +def test_default_url_no_legacy_home_literal(git_repo: Path, monkeypatch) -> None: + """Regression guard: the legacy `~/.bicameral/ledger.db` literal is gone. + + The adapter's default must land under `~/.bicameral/projects//`, + never the un-project-scoped `~/.bicameral/ledger.db` of v0.15.x. + """ + from ledger.adapter import _default_db_url + + monkeypatch.chdir(git_repo) + url = _default_db_url() + + # Must NOT be the legacy un-project-scoped path. + assert not url.endswith("/.bicameral/ledger.db") + # MUST be under the project-scoped projects/ dir. + assert "/.bicameral/projects/" in url From 43ce00a29458c95ea3576201303f7d9ba9e58a64 Mon Sep 17 00:00:00 2001 From: jinhongkuan Date: Mon, 18 May 2026 17:50:35 -0700 Subject: [PATCH 5/8] feat(events): delegate watermark + transcript queues to locator (Phase 2B-ii) (#368) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 2B (call-site delegation), part ii: events/materializer.py and events/transcript_queue.py stop computing their state paths inline and delegate to the Ledger Locator. The watermark and pending/processed transcript queues now live under the locator's project dir — shared across worktrees of one repo, ending the v0.15.x failure modes where peer JSONL events re-replayed per-worktree and SessionEnd-hook transcripts written from worktree A were invisible to worktree B's drain loop. Delegations: - `events/materializer.EventMaterializer`: `local_dir` parameter dropped from the positional contract; replaced by an optional `repo_path` (locator-scoping) and a keyword-only `watermark_override` (tests-only escape hatch). Production callers pass `repo_path` or nothing; `watermark_override` exists so test fixtures don't have to git-init tmp_path-derived dirs. Internal mkdir on the watermark parent is preserved as belt-and-braces. - `events/transcript_queue._pending_root` / `_processed_root`: signature unchanged (still take `repo_path: str`); body delegates to `resolve_pending_transcripts_dir` / `resolve_processed_transcripts_dir`. Every caller through this module transparently moves to the project-scoped layout — no caller changes needed for downstream code. - `adapters/ledger.py:117` (team-mode wiring): pass `repo_path` to `EventMaterializer`; drop the local watermark-dir computation. - `handlers/reset.py::_replay_events_into_ledger`: use `resolve_watermark_path()` for the "{}" reset write; drop the local-dir mkdir / inline watermark path. Tests (51 passing — 28 prior + 13 transcript-queue regression): - `tests/test_session_end_queue_writer.py`: `_make_repo` now git-inits the tmp_path fixture; assertions on pending/processed paths use `_pending_root` / `_processed_root` instead of literal `/.bicameral/...` paths. The writer subprocess (cwd=repo) and CLI archiver (cwd=repo) both transparently use the locator now. - `tests/test_team_event_replay.py`, `tests/test_team_adapter_with_backend.py`, `tests/test_team_round_trip_local_folder.py`, `tests/_replay_helpers.py`: pass `watermark_override=local_dir/"watermark"` to `EventMaterializer` so fixtures keep their per-test watermark location without git-init'ing every tmp_path. ruff: clean. Co-Authored-By: Claude Opus 4.7 (1M context) --- adapters/ledger.py | 6 ++- events/materializer.py | 46 +++++++++++++++++++--- events/transcript_queue.py | 33 +++++++++++----- handlers/reset.py | 11 ++++-- tests/_replay_helpers.py | 2 +- tests/test_session_end_queue_writer.py | 27 ++++++++++--- tests/test_team_adapter_with_backend.py | 2 +- tests/test_team_event_replay.py | 2 +- tests/test_team_round_trip_local_folder.py | 2 +- 9 files changed, 101 insertions(+), 30 deletions(-) diff --git a/adapters/ledger.py b/adapters/ledger.py index 7a063d4e..0f8713f7 100644 --- a/adapters/ledger.py +++ b/adapters/ledger.py @@ -110,11 +110,13 @@ def get_ledger(): data_path = os.getenv("BICAMERAL_DATA_PATH", repo_path) bicameral_dir = Path(data_path) / ".bicameral" events_dir = bicameral_dir / "events" - local_dir = bicameral_dir / "local" author = _get_git_email(repo_path) writer = EventFileWriter(events_dir, author) - materializer = EventMaterializer(events_dir, local_dir) + # #368 Phase 2B-ii: watermark lives at the locator-resolved + # project dir, not bicameral_dir/"local"/"watermark". The + # materializer derives the watermark path from `repo_path`. + materializer = EventMaterializer(events_dir, Path(repo_path)) cfg.setdefault("team", {})["author"] = author try: diff --git a/events/materializer.py b/events/materializer.py index 607000f7..2a138b3e 100644 --- a/events/materializer.py +++ b/events/materializer.py @@ -1,8 +1,12 @@ """EventMaterializer — replays JSONL event logs into the local ledger (v0.4.20). One file per contributor: ``.bicameral/events/{email}.jsonl``. Watermark -is a JSON ``{email: byte_offset}`` map at ``.bicameral/local/watermark``. -Replay resumes from the stored offset per author. +is a JSON ``{email: byte_offset}`` map at the locator-resolved +``~/.bicameral/projects//watermark`` (#368 Phase 2B-ii). Replay +resumes from the stored offset per author. + +Per-worktree watermark layout (pre-#368) caused peer JSONL to replay N +times across N worktrees; project-scoping fixes that. Auto-migrates legacy ``{email}/*.json`` layout (v0.4.13 – v0.4.19) on first startup, then deletes the old files. DB-level ``canonical_id`` @@ -19,10 +23,42 @@ class EventMaterializer: - def __init__(self, events_dir: Path, local_dir: Path) -> None: + def __init__( + self, + events_dir: Path, + repo_path: Path | None = None, + *, + watermark_override: Path | None = None, + ) -> None: + """#368 Phase 2B-ii: watermark path is locator-resolved by default. + + ``events_dir`` is the per-repo JSONL substrate (commit-tracked or + remote-synced). The watermark file (offset-per-author map) is + now project-scoped under the locator's project dir so worktrees + of one repo share it. + + Arguments: + events_dir: the JSONL events directory (unchanged contract). + repo_path: optional repo to scope the locator's project id + to. Defaults to ``Path.cwd()`` via the locator. + watermark_override: keyword-only test escape hatch. When + provided, used verbatim as the watermark file path and + the locator is not consulted. Production callers should + NOT pass this — it's a tests-only knob so fixtures can + operate outside a git repo without git-init'ing + tmp_path-derived dirs. + """ self._events_dir = events_dir - self._watermark_path = local_dir / "watermark" - local_dir.mkdir(parents=True, exist_ok=True) + if watermark_override is not None: + self._watermark_path = watermark_override + else: + from ledger_locator import resolve_watermark_path + + self._watermark_path = resolve_watermark_path(repo_path) + # Belt-and-braces mkdir: locator's `_resolved_project_dir` mkdir's + # the project dir via `assert_origin`, but watermark_override path + # may live anywhere (test fixtures). Idempotent. + self._watermark_path.parent.mkdir(parents=True, exist_ok=True) def _read_offsets(self) -> dict[str, int]: if not self._watermark_path.exists(): diff --git a/events/transcript_queue.py b/events/transcript_queue.py index 78bab782..39b7be62 100644 --- a/events/transcript_queue.py +++ b/events/transcript_queue.py @@ -1,12 +1,17 @@ """Pending-transcripts queue (#156). -The SessionEnd hook copies the parent session's transcript to -``/.bicameral/pending-transcripts/.jsonl``. The next -session's preflight Step 3.5 reads the queue FIFO, surfaces corrections -as ask-findings, then archives processed files to -``/.bicameral/processed-transcripts/``. - -This module is the single source of truth for queue layout. Future +The SessionEnd hook copies the parent session's transcript to the +locator-resolved pending dir. The next session's preflight Step 3.5 +reads the queue FIFO, surfaces corrections as ask-findings, then +archives processed files to the processed dir. + +#368 Phase 2B-ii: queue directories now live under +``~/.bicameral/projects//{pending,processed}-transcripts/`` (project- +scoped, shared across worktrees) — not per-worktree under +``/.bicameral/`` as in v0.15.x. Worktree A's transcript is now +visible to worktree B's drain loop. + +This module remains the single source of truth for queue layout. Future team-server config may override retention and merge policy by reading this module's defaults. """ @@ -20,11 +25,21 @@ def _pending_root(repo_path: str) -> Path: - return Path(repo_path) / ".bicameral" / PENDING_DIR + """Resolve the project-scoped pending-transcripts dir (#368 Phase 2B-ii). + + Delegates to ``ledger_locator.resolve_pending_transcripts_dir``. The + function signature stays so existing call sites remain unchanged. + """ + from ledger_locator import resolve_pending_transcripts_dir + + return resolve_pending_transcripts_dir(Path(repo_path)) def _processed_root(repo_path: str) -> Path: - return Path(repo_path) / ".bicameral" / PROCESSED_DIR + """Resolve the project-scoped processed-transcripts dir (#368 Phase 2B-ii).""" + from ledger_locator import resolve_processed_transcripts_dir + + return resolve_processed_transcripts_dir(Path(repo_path)) def write_pending(repo_path: str, session_id: str, transcript_path: str) -> Path | None: diff --git a/handlers/reset.py b/handlers/reset.py index a05da9ee..e02b3dc4 100644 --- a/handlers/reset.py +++ b/handlers/reset.py @@ -299,9 +299,12 @@ async def _replay_events_into_ledger(ledger) -> int: if events_dir is None: return 0 - local_dir = events_dir.parent / "local" - watermark_path = local_dir / "watermark" - local_dir.mkdir(parents=True, exist_ok=True) + # #368 Phase 2B-ii: watermark lives at the locator-resolved project + # dir. The locator's `_resolved_project_dir` mkdir's parent on first + # use via `assert_origin`, so no explicit mkdir needed here. + from ledger_locator import resolve_watermark_path + + watermark_path = resolve_watermark_path() # Reset the watermark so every event replays from offset 0. The # materializer's offset map is `{author: byte_offset}`; writing an # empty object is the canonical "start over" signal. @@ -309,7 +312,7 @@ async def _replay_events_into_ledger(ledger) -> int: from events.materializer import EventMaterializer - materializer = EventMaterializer(events_dir, local_dir) + materializer = EventMaterializer(events_dir) return await materializer.replay_new_events(inner) diff --git a/tests/_replay_helpers.py b/tests/_replay_helpers.py index 7ef66c23..a6fa2ca3 100644 --- a/tests/_replay_helpers.py +++ b/tests/_replay_helpers.py @@ -264,7 +264,7 @@ async def replay_substrate( path = events_dir / f"{author}.jsonl" with open(path, "ab") as f: f.write(build_event_log(events, author)) - materializer = EventMaterializer(events_dir, local_dir) + materializer = EventMaterializer(events_dir, watermark_override=local_dir / "watermark") return await materializer.replay_new_events(adapter) diff --git a/tests/test_session_end_queue_writer.py b/tests/test_session_end_queue_writer.py index 4a781b79..35fad24d 100644 --- a/tests/test_session_end_queue_writer.py +++ b/tests/test_session_end_queue_writer.py @@ -41,6 +41,10 @@ def _run_writer(stdin_payload: str | bytes, cwd: Path) -> subprocess.CompletedPr def _make_repo(tmp_path: Path, with_bicameral: bool = True) -> Path: repo = tmp_path / "repo" repo.mkdir() + # #368 Phase 2B-ii: transcript_queue delegates to the locator which + # requires a git repo. Init one so the locator can resolve the + # project-scoped pending-transcripts dir. + subprocess.run(["git", "init", "-q"], cwd=repo, check=True) if with_bicameral: (repo / ".bicameral").mkdir() return repo @@ -53,6 +57,8 @@ def _make_transcript(tmp_path: Path, content: str = '{"type":"user","text":"hi"} def test_writer_copies_transcript_to_pending_dir(tmp_path: Path) -> None: + from events.transcript_queue import _pending_root + repo = _make_repo(tmp_path) src = _make_transcript(tmp_path, content='{"a":1}\n{"b":2}\n') payload = json.dumps({"session_id": "abc", "transcript_path": str(src), "cwd": str(repo)}) @@ -60,7 +66,7 @@ def test_writer_copies_transcript_to_pending_dir(tmp_path: Path) -> None: result = _run_writer(payload, cwd=repo) assert result.returncode == 0 - dst = repo / ".bicameral" / "pending-transcripts" / "abc.jsonl" + dst = _pending_root(str(repo)) / "abc.jsonl" assert dst.is_file() assert dst.read_bytes() == src.read_bytes() @@ -77,6 +83,8 @@ def test_writer_no_op_when_no_bicameral_dir(tmp_path: Path) -> None: def test_writer_no_op_when_transcript_missing(tmp_path: Path) -> None: + from events.transcript_queue import _pending_root + repo = _make_repo(tmp_path) payload = json.dumps( {"session_id": "x", "transcript_path": str(tmp_path / "nope.jsonl"), "cwd": str(repo)} @@ -85,21 +93,25 @@ def test_writer_no_op_when_transcript_missing(tmp_path: Path) -> None: result = _run_writer(payload, cwd=repo) assert result.returncode == 0 - pending = repo / ".bicameral" / "pending-transcripts" + pending = _pending_root(str(repo)) assert not pending.exists() or list(pending.iterdir()) == [] def test_writer_handles_malformed_stdin(tmp_path: Path) -> None: + from events.transcript_queue import _pending_root + repo = _make_repo(tmp_path) result = _run_writer("not json at all", cwd=repo) assert result.returncode == 0 - pending = repo / ".bicameral" / "pending-transcripts" + pending = _pending_root(str(repo)) assert not pending.exists() or list(pending.iterdir()) == [] def test_writer_uses_uuid_when_session_id_missing(tmp_path: Path) -> None: + from events.transcript_queue import _pending_root + repo = _make_repo(tmp_path) src = _make_transcript(tmp_path) payload = json.dumps({"transcript_path": str(src), "cwd": str(repo)}) @@ -107,7 +119,7 @@ def test_writer_uses_uuid_when_session_id_missing(tmp_path: Path) -> None: result = _run_writer(payload, cwd=repo) assert result.returncode == 0 - pending = repo / ".bicameral" / "pending-transcripts" + pending = _pending_root(str(repo)) files = list(pending.iterdir()) assert len(files) == 1 uuid_re = re.compile(r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\.jsonl$") @@ -146,7 +158,8 @@ def test_archive_processed_moves_pending_to_processed(tmp_path: Path) -> None: assert dst.is_file() assert dst.name == "sess1.jsonl" assert dst.read_text(encoding="utf-8") == "payload-A" - assert dst.parent == repo / ".bicameral" / "processed-transcripts" + from events.transcript_queue import _processed_root + assert dst.parent == _processed_root(str(repo)) def test_archive_processed_idempotent_on_replay(tmp_path: Path) -> None: @@ -184,9 +197,11 @@ def test_transcript_archive_invokes_archive_processed(tmp_path: Path) -> None: timeout=15, ) + from events.transcript_queue import _processed_root + assert happy.returncode == 0, happy.stderr.decode("utf-8", errors="replace") assert not pending.exists() - archived = repo / ".bicameral" / "processed-transcripts" / "sess-cli.jsonl" + archived = _processed_root(str(repo)) / "sess-cli.jsonl" assert archived.is_file() assert archived.read_text(encoding="utf-8") == "archived-content" diff --git a/tests/test_team_adapter_with_backend.py b/tests/test_team_adapter_with_backend.py index ff97167a..70e16b39 100644 --- a/tests/test_team_adapter_with_backend.py +++ b/tests/test_team_adapter_with_backend.py @@ -71,7 +71,7 @@ def _build( ) -> tuple[TeamWriteAdapter, SurrealDBLedgerAdapter]: inner = SurrealDBLedgerAdapter(url="memory://") writer = EventFileWriter(events_dir, author) - materializer = EventMaterializer(events_dir, local_dir) + materializer = EventMaterializer(events_dir, watermark_override=local_dir / "watermark") return TeamWriteAdapter(inner, writer, materializer, backend=backend), inner diff --git a/tests/test_team_event_replay.py b/tests/test_team_event_replay.py index b2adaf6a..5c97ceb2 100644 --- a/tests/test_team_event_replay.py +++ b/tests/test_team_event_replay.py @@ -38,7 +38,7 @@ def _build_team_adapter( """Wire up an in-memory inner adapter + JSONL event log + materializer.""" inner = SurrealDBLedgerAdapter(url="memory://") writer = EventFileWriter(events_dir, author) - materializer = EventMaterializer(events_dir, local_dir) + materializer = EventMaterializer(events_dir, watermark_override=local_dir / "watermark") return TeamWriteAdapter(inner, writer, materializer), inner diff --git a/tests/test_team_round_trip_local_folder.py b/tests/test_team_round_trip_local_folder.py index 0bd7eacf..9f2e9235 100644 --- a/tests/test_team_round_trip_local_folder.py +++ b/tests/test_team_round_trip_local_folder.py @@ -40,7 +40,7 @@ def _payload(intent: str, source_ref: str) -> dict: def _build(events_dir: Path, local_dir: Path, author: str, remote_root: Path) -> TeamWriteAdapter: inner = SurrealDBLedgerAdapter(url="memory://") writer = EventFileWriter(events_dir, author) - materializer = EventMaterializer(events_dir, local_dir) + materializer = EventMaterializer(events_dir, watermark_override=local_dir / "watermark") backend = LocalFolderAdapter(remote_root=remote_root, author=author) return TeamWriteAdapter(inner, writer, materializer, backend=backend) From f7853e20b70f469614d64467fb85932af88b9099 Mon Sep 17 00:00:00 2001 From: jinhongkuan Date: Mon, 18 May 2026 19:46:05 -0700 Subject: [PATCH 6/8] feat(setup_wizard): R4 config split + git-native onboarding (Phase 2C, #368) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements the operator-facing pieces of the R4 ledger-locator amendment: - `_build_config` no longer writes SURREAL_URL / CODE_LOCATOR_SQLITE_DB to .mcp.json — the locator picks both up at runtime (decision:5nr66wvmapjpt58rrji8). - `context._CONFIG_KEY_ROUTING` is the single source of truth partitioning team-identity keys (mode, team.backend/folder/remote_root, ingest_max_bytes) from per-operator keys (telemetry, channel, guided, signer_email_fallback, render_source_attribution, team.role, ingest_rate_limit_*, query_timeout_*). All 10 context.py readers route per-key via `_config_path_for_key`; falls back to `/.bicameral/ config.yaml` when the locator can't resolve a project id (non-git tmpdir → preserves v0.15.x behavior for legacy tests). - `_write_collaboration_config` writes BOTH files atomically (operator first → config second, both via temp+rename). On rename failure of the second file, the just-renamed operator file is rolled back and both temps unlinked — neither destination ends up half-written. Accepts a test-only `operator_path` override; falls back to single-file legacy layout when the locator can't resolve. - `run_setup` detects committed team/solo config via `git show HEAD:.bicameral/config.yaml` and auto-joins; falls through to the prompt flow on no-commit / non-team / parse failure (decision:ew9rgegdlblexsraesss). Divergence guard: when HEAD lacks the file but the default branch (via `_resolve_authoritative_ref`) has it, prompt the operator to merge-first before persisting a fresh setup (decision:ogdfx014sqgc6fi6ky1a). - `run_config_wizard` reads from both files via the locator, tags each prompt with `[team]` or `[your machine]`, writes back via the same atomic two-file split as `_write_collaboration_config`. Tests (14 new, 0 broken): - test_setup_wizard_omits_state_env_vars.py — 3 tests - test_config_split.py — 5 tests (incl. rollback-on-failure + routing-table-covers-every-key + reads-route-per-key) - test_setup_wizard_git_native.py — 6 tests (team/solo auto-join, divergence guard, _read_committed_config unit coverage) - test_run_config_wizard.py — 2 tests (reads from both, writes to routed) 125 existing tests across setup_wizard + locator + context pass. ruff clean across the diff. Co-Authored-By: Claude Opus 4.7 (1M context) --- context.py | 94 ++++- setup_wizard.py | 386 +++++++++++++----- tests/test_config_split.py | 165 ++++++++ tests/test_run_config_wizard.py | 197 +++++++++ tests/test_setup_wizard_git_native.py | 158 +++++++ .../test_setup_wizard_omits_state_env_vars.py | 33 ++ 6 files changed, 924 insertions(+), 109 deletions(-) create mode 100644 tests/test_config_split.py create mode 100644 tests/test_run_config_wizard.py create mode 100644 tests/test_setup_wizard_git_native.py create mode 100644 tests/test_setup_wizard_omits_state_env_vars.py diff --git a/context.py b/context.py index 6ce1dd5c..1a35fcf2 100644 --- a/context.py +++ b/context.py @@ -6,10 +6,69 @@ import uuid from dataclasses import dataclass, field from pathlib import Path +from typing import Literal # Generated once per server process — all tool calls in the same session share it. _SESSION_ID: str = str(uuid.uuid4()) +# #368 R4 config split (decision:5nr66wvmapjpt58rrji8): keys partition into +# team-identity (committed to git at ``/.bicameral/config.yaml``) vs +# per-operator (private at ``~/.bicameral/projects//operator.yaml``). +# Single source of truth for the wizard's writer, the context.py readers, +# the run-config-wizard editor, and migrate-state's partitioner. +# +# When adding a new key: add it here too. ``test_config_split.py:: +# test_routing_table_covers_every_key`` catches drift. +_CONFIG_KEY_ROUTING: dict[str, Literal["team", "operator"]] = { + # team-identity (commit-able) + "mode": "team", + "team.backend": "team", + "team.folder_id": "team", + "team.remote_root": "team", + "ingest_max_bytes": "team", + "signer_email_fallback_min_strength": "team", + # per-operator (private; lives under ~/.bicameral/projects//) + "telemetry": "operator", + "channel": "operator", + "guided": "operator", + "signer_email_fallback": "operator", + "render_source_attribution": "operator", + "team.author": "operator", + "team.role": "operator", + "ingest_rate_limit_burst": "operator", + "ingest_rate_limit_refill_per_sec": "operator", + "query_timeout_read_seconds": "operator", + "query_timeout_drift_seconds": "operator", +} + + +def _config_path_for_key(repo_path: str, key: str) -> Path: + """Return the config file path that owns ``key`` under the R4 split. + + Unknown keys default to the team config (commit-able). The wizard's + writer raises on unknown keys; this reader is permissive — an old + server reading a forward-compat config should not crash, it should + fall through to the per-key default. + + For operator-routed keys, the locator is consulted. If the locator + can't resolve a project id (non-git tmpdir in a test, no git binary), + we fall back to the team config path — that preserves the v0.15.x + behavior where every key lived in ``.bicameral/config.yaml``, so + existing tests that don't git-init keep passing. Collision errors + are NOT swallowed; the locator's whole point is to surface those. + """ + routing = _CONFIG_KEY_ROUTING.get(key, "team") + if routing == "operator": + try: + from ledger_locator import ProjectIdResolutionError, resolve_operator_config_path + except ImportError: + return Path(repo_path) / ".bicameral" / "config.yaml" + try: + return resolve_operator_config_path(Path(repo_path)) + except ProjectIdResolutionError: + return Path(repo_path) / ".bicameral" / "config.yaml" + return Path(repo_path) / ".bicameral" / "config.yaml" + _GUIDED_MODE_TRUTHY = frozenset({"1", "true", "yes", "on"}) _GUIDED_MODE_FALSY = frozenset({"0", "false", "no", "off", ""}) @@ -64,11 +123,13 @@ def _read_yaml_string_field(repo_path: str, key: str, valid: frozenset[str], default: str) -> str: - """Generic reader for a `.bicameral/config.yaml` string field with a - fixed valid-set and fail-soft default. Returns the raw config value - if it's in the valid set; falls back to default on missing file, - malformed yaml, missing key, or invalid value.""" - config_path = Path(repo_path) / ".bicameral" / "config.yaml" + """Generic reader for a config-yaml string field with a fixed valid-set + and fail-soft default. Reads from the file owned by ``key`` under the + R4 routing table (``/.bicameral/config.yaml`` for team-identity + keys, ``~/.bicameral/projects//operator.yaml`` for per-operator + keys). Falls back to default on missing file, malformed yaml, missing + key, or invalid value.""" + config_path = _config_path_for_key(repo_path, key) if not config_path.exists(): return default try: @@ -162,14 +223,14 @@ def _resolve_agent_identity(repo_path: str) -> str: def _read_ingest_max_bytes(repo_path: str) -> int: - """Resolve ``ingest_max_bytes`` from ``.bicameral/config.yaml``. + """Resolve ``ingest_max_bytes`` from the team-identity config. Default 1 MiB. Clamped to ``[_INGEST_MAX_BYTES_MIN, _INGEST_MAX_BYTES_MAX]``; out-of-range values (negative, non-integer, beyond clamp) fall back to the default with no silent acceptance. Read by ``handlers.ingest._check_payload_size``. """ - config_path = Path(repo_path) / ".bicameral" / "config.yaml" + config_path = _config_path_for_key(repo_path, "ingest_max_bytes") if not config_path.exists(): return _DEFAULT_INGEST_MAX_BYTES try: @@ -187,13 +248,13 @@ def _read_ingest_max_bytes(repo_path: str) -> int: def _read_ingest_rate_limit_burst(repo_path: str) -> int: - """Resolve ``ingest_rate_limit_burst`` from ``.bicameral/config.yaml``. + """Resolve ``ingest_rate_limit_burst`` from the per-operator config. Default 10. Clamped to ``[_INGEST_RATE_LIMIT_BURST_MIN, _INGEST_RATE_LIMIT_BURST_MAX]``. Out-of-range / non-int / malformed yaml all fall back to default (no silent acceptance). """ - config_path = Path(repo_path) / ".bicameral" / "config.yaml" + config_path = _config_path_for_key(repo_path, "ingest_rate_limit_burst") if not config_path.exists(): return _DEFAULT_INGEST_RATE_LIMIT_BURST try: @@ -211,7 +272,7 @@ def _read_ingest_rate_limit_burst(repo_path: str) -> int: def _read_ingest_rate_limit_refill_per_sec(repo_path: str) -> float: - """Resolve ``ingest_rate_limit_refill_per_sec`` from ``.bicameral/config.yaml``. + """Resolve ``ingest_rate_limit_refill_per_sec`` from the per-operator config. Default 1.0. Clamped to ``[_INGEST_RATE_LIMIT_REFILL_MIN, _INGEST_RATE_LIMIT_REFILL_MAX]``. ``0.0`` would lock the bucket @@ -219,7 +280,7 @@ def _read_ingest_rate_limit_refill_per_sec(repo_path: str) -> float: to default. Out-of-range / non-numeric / malformed yaml all fall back to default. """ - config_path = Path(repo_path) / ".bicameral" / "config.yaml" + config_path = _config_path_for_key(repo_path, "ingest_rate_limit_refill_per_sec") if not config_path.exists(): return _DEFAULT_INGEST_RATE_LIMIT_REFILL_PER_SEC try: @@ -256,7 +317,7 @@ def _read_query_timeout_seconds( min_val: float, max_val: float, ) -> float: - """Resolve a query-timeout-seconds field from ``.bicameral/config.yaml``. + """Resolve a query-timeout-seconds field from the config file owned by ``key``. Out-of-range values are **clamped** to ``[min_val, max_val]`` (preserve operator intent for "long but bounded" — silently substituting the @@ -264,7 +325,7 @@ def _read_query_timeout_seconds( non-numeric / bool / malformed-yaml all fall back to the documented default (those aren't operator intent; they're config errors). """ - config_path = Path(repo_path) / ".bicameral" / "config.yaml" + config_path = _config_path_for_key(repo_path, key) if not config_path.exists(): return default try: @@ -321,8 +382,9 @@ def _read_guided_mode(repo_path: str) -> bool: Precedence: 1. ``BICAMERAL_GUIDED_MODE`` env var (truthy / falsy) — one-off override - 2. ``guided: true/false`` in ``/.bicameral/config.yaml`` — durable - setting chosen at ``bicameral setup`` time + 2. ``guided: true/false`` in the per-operator config (R4 split — lives + at ``~/.bicameral/projects//operator.yaml``) — durable setting + chosen at ``bicameral setup`` time 3. Default: ``False`` (normal mode — action hints still fire, but as non-blocking advisories) """ @@ -332,7 +394,7 @@ def _read_guided_mode(repo_path: str) -> bool: if env_val in _GUIDED_MODE_FALSY and env_val != "": return False - config_path = Path(repo_path) / ".bicameral" / "config.yaml" + config_path = _config_path_for_key(repo_path, "guided") if not config_path.exists(): return False try: diff --git a/setup_wizard.py b/setup_wizard.py index a40cfc2f..aba81d07 100644 --- a/setup_wizard.py +++ b/setup_wizard.py @@ -356,6 +356,44 @@ def _resolve_authoritative_ref(repo_path: Path) -> tuple[str, bool]: return branch, needs_override +def _read_committed_config(repo_path: Path, ref: str = "HEAD") -> dict | None: + """Return the parsed YAML at ``ref:.bicameral/config.yaml`` via ``git show``. + + R4 git-native onboarding (decision:ew9rgegdlblexsraesss): the wizard + detects whether team-mode is already configured for this clone by + asking git for the committed config, not by walking the filesystem. + Works uniformly across worktrees, submodules, bare-repo deployments, + sparse checkouts, devcontainers, Codespaces, CI runners, and + ``--separate-git-dir`` layouts — none of which a filesystem-topology + probe could classify correctly. + + Returns the parsed config dict on success, ``None`` on any failure + (no commit yet, file not tracked at ``ref``, malformed YAML, git + binary missing). The caller should treat ``None`` as "no committed + config — proceed with the prompt flow." + """ + try: + result = subprocess.run( + ["git", "show", f"{ref}:.bicameral/config.yaml"], + cwd=repo_path, + capture_output=True, + text=True, + ) + except (FileNotFoundError, OSError): + return None + if result.returncode != 0 or not result.stdout.strip(): + return None + try: + import yaml + + parsed = yaml.safe_load(result.stdout) + except Exception: + return None + if not isinstance(parsed, dict): + return None + return parsed + + def _detect_agents() -> list[str]: """Auto-detect which coding agents are available.""" found = [] @@ -448,8 +486,10 @@ def _build_config( Keeping data_path separate lets history live in a private parent repo while REPO_PATH points to the public code repo. - In team mode, local DBs go under .bicameral/local/ (gitignored) - so they don't leak into the tracked events directory. + The ledger and code-graph paths are NOT written here — `ledger_locator` + resolves both at runtime to `~/.bicameral/projects//` (#368). Users + who need a non-default path set `SURREAL_URL` / `CODE_LOCATOR_SQLITE_DB` + in their own environment. authoritative_ref: when set, written into the config env as ``BICAMERAL_AUTHORITATIVE_REF`` so the runtime detector pins the @@ -461,16 +501,8 @@ def _build_config( data_root = (data_path or repo_path) / ".bicameral" data_root.mkdir(parents=True, exist_ok=True) - if mode == "team": - local_dir = data_root / "local" - local_dir.mkdir(parents=True, exist_ok=True) - else: - local_dir = data_root - env: dict[str, str] = { "REPO_PATH": str(repo_path), - "SURREAL_URL": f"surrealkv://{local_dir / 'ledger.db'}", - "CODE_LOCATOR_SQLITE_DB": str(local_dir / "code-graph.db"), "BICAMERAL_TELEMETRY": "1" if telemetry else "0", } if data_path is not None and data_path.resolve() != repo_path.resolve(): @@ -1532,45 +1564,123 @@ def _write_collaboration_config( telemetry: bool = False, team_backend: dict | None = None, channel: str | None = None, + operator_path: Path | None = None, ) -> None: - """Write .bicameral/config.yaml with collaboration mode, guided-mode, telemetry, - signer-email fallback, and (optionally) the team-backend block. - - `signer_email_fallback` (#200 Phase 2) defaults to `local-part-only` — - privacy-positive: preserves attribution prefix on session-originated - ingests without leaking the full git user.email to the ledger / team- - mode JSONL substrate. Modes: `redact` (strongest, no attribution), - `local-part-only` (default), `full` (legacy verbatim email). - - `team_backend` (#277): when present, persists `team:` block with - `backend`, `role`, and either `folder_id` (Drive) or `remote_root` - (LocalFolder). - - `channel`: release channel for ``bicameral.update``. Defaults to - auto-detect via ``_detect_install_channel()`` — a ``.devN`` install - writes ``channel: nightly``, anything else writes ``channel: stable``. - Tests pass an explicit value to lock behavior. + """Write the team-identity and per-operator config files (R4 split, #368). + + Two destinations under the R4 split (decision:5nr66wvmapjpt58rrji8): + + - ``/.bicameral/config.yaml`` — team-identity keys: + ``mode``, the ``team.backend``/``team.folder_id``/``team.remote_root`` + block. Committed to git so teammates inherit on clone. + - ``operator_path`` (default: ``ledger_locator.resolve_operator_config_path(data_path)``, + which lives under ``~/.bicameral/projects//``) — per-operator + keys: ``guided``, ``telemetry``, ``channel``, + ``signer_email_fallback``, ``render_source_attribution``, + ``team.role``. Per-machine, never committed. + + Atomic two-file write: both files are first written to ``.tmp``, + then renamed in sequence (operator first → config second). If the + second rename fails, the newly-renamed operator file is unlinked and + any remaining temp files are cleaned up before re-raising — neither + destination ends up in a half-written state. + + Fallback: when ``operator_path`` is None and ``data_path`` isn't + inside a git work tree (locator can't resolve a project id), every + key is written to the single team config.yaml — the v0.15.x layout. + This preserves behavior for pre-R4 tests and non-git scratch dirs; + real ``run_setup`` always runs in a git repo so the split fires. + + ``signer_email_fallback`` (#200 Phase 2) defaults to ``local-part-only`` + (privacy-positive). ``team_backend`` (#277): the ``backend`` / + ``folder_id`` / ``remote_root`` keys land in the team file; ``role`` + lands in the operator file. ``channel`` defaults to auto-detect via + ``_detect_install_channel()``. """ resolved_channel = channel if channel is not None else _detect_install_channel() config_path = data_path / ".bicameral" / "config.yaml" config_path.parent.mkdir(parents=True, exist_ok=True) - base = ( - "# Bicameral configuration\n" - f"mode: {mode}\n" - f"guided: {'true' if guided else 'false'}\n" - f"telemetry: {'true' if telemetry else 'false'}\n" - f"channel: {resolved_channel}\n" - "signer_email_fallback: local-part-only\n" - "render_source_attribution: redacted\n" # #209: privacy-positive default - ) + + resolved_operator_path = operator_path + if resolved_operator_path is None: + try: + from ledger_locator import ( + ProjectIdResolutionError, + resolve_operator_config_path, + ) + + resolved_operator_path = resolve_operator_config_path(data_path) + except (ImportError, ProjectIdResolutionError): + resolved_operator_path = None # fall through to single-file legacy layout + + # Team-identity body (always written to /.bicameral/config.yaml). + team_body = "# Bicameral configuration (team identity — committed)\n" + team_body += f"mode: {mode}\n" if team_backend: team_lines = ["team:"] - for key in ("backend", "folder_id", "remote_root", "role"): + for key in ("backend", "folder_id", "remote_root"): if key in team_backend: - value = team_backend[key] - team_lines.append(f" {key}: {value}") - base += "\n".join(team_lines) + "\n" - config_path.write_text(base, encoding="utf-8") + team_lines.append(f" {key}: {team_backend[key]}") + team_body += "\n".join(team_lines) + "\n" + + # Per-operator body (written to operator.yaml under the project state dir). + operator_body = "# Bicameral operator configuration (per-machine — private)\n" + operator_body += f"guided: {'true' if guided else 'false'}\n" + operator_body += f"telemetry: {'true' if telemetry else 'false'}\n" + operator_body += f"channel: {resolved_channel}\n" + operator_body += "signer_email_fallback: local-part-only\n" + operator_body += "render_source_attribution: redacted\n" # #209: privacy-positive default + if team_backend and "role" in team_backend: + operator_body += "team:\n" + operator_body += f" role: {team_backend['role']}\n" + + if resolved_operator_path is None: + # Non-git fallback: collapse to single config.yaml. Keep the legacy + # body shape so v0.15.x readers (and old tests) keep working. + legacy_body = ( + "# Bicameral configuration\n" + f"mode: {mode}\n" + f"guided: {'true' if guided else 'false'}\n" + f"telemetry: {'true' if telemetry else 'false'}\n" + f"channel: {resolved_channel}\n" + "signer_email_fallback: local-part-only\n" + "render_source_attribution: redacted\n" + ) + if team_backend: + team_lines = ["team:"] + for key in ("backend", "folder_id", "remote_root", "role"): + if key in team_backend: + team_lines.append(f" {key}: {team_backend[key]}") + legacy_body += "\n".join(team_lines) + "\n" + config_path.write_text(legacy_body, encoding="utf-8") + else: + resolved_operator_path.parent.mkdir(parents=True, exist_ok=True) + config_tmp = config_path.with_suffix(config_path.suffix + ".tmp") + operator_tmp = resolved_operator_path.with_suffix( + resolved_operator_path.suffix + ".tmp" + ) + config_tmp.write_text(team_body, encoding="utf-8") + operator_tmp.write_text(operator_body, encoding="utf-8") + operator_promoted = False + try: + operator_tmp.replace(resolved_operator_path) + operator_promoted = True + config_tmp.replace(config_path) + except Exception: + # Roll back any partial state: unlink the freshly renamed + # operator.yaml (if step 1 succeeded) and any leftover temps. + if operator_promoted: + try: + resolved_operator_path.unlink() + except FileNotFoundError: + pass + for tmp in (config_tmp, operator_tmp): + try: + tmp.unlink() + except FileNotFoundError: + pass + raise + print(f" Collaboration: {mode} mode") print(f" Guided mode: {'on — blocking hints' if guided else 'off — advisory hints'}") print(f" Telemetry: {'on — anonymous usage stats' if telemetry else 'off'}") @@ -1588,6 +1698,9 @@ def _write_collaboration_config( "in config.yaml to change)" ) print(" Preflight bypass tracking: enabled (writes ~/.bicameral/preflight_events.jsonl)") + if resolved_operator_path is not None: + print(f" team config → {config_path} (commit this)") + print(f" your settings → {resolved_operator_path} (private)") def _patch_gitignore(path: Path, entries: list[str], comment: str) -> None: @@ -1725,15 +1838,56 @@ def run_setup( return 1 # Step 4: Collaboration mode + guided intensity + telemetry + gitignore - collab_mode = _select_collaboration_mode() + # R4 git-native onboarding (decision:ew9rgegdlblexsraesss): if HEAD + # already has a committed .bicameral/config.yaml, inherit its mode + + # team backend instead of prompting. Falls through to the interactive + # path on no-commit / no-config / non-team / parse failure. + committed = _read_committed_config(repo_path, "HEAD") + collab_mode: str + team_backend: dict | None = None + if committed is not None and committed.get("mode") in ("team", "solo"): + collab_mode = committed["mode"] + if collab_mode == "team" and isinstance(committed.get("team"), dict): + team_block = committed["team"] + backend = team_block.get("backend") + target = team_block.get("folder_id") or team_block.get("remote_root") + print( + f" Detected team config: backend={backend}, " + f"folder={target} ✓ (auto-joining)" + ) + team_backend = {k: v for k, v in team_block.items() if v is not None} + else: + print(f" Detected committed config: mode={collab_mode} ✓ (auto-inheriting)") + else: + # R4 divergence guard (decision:ogdfx014sqgc6fi6ky1a): HEAD has no + # committed config — is it living on a feature branch where the + # default branch DID commit one? Surface the inheritance gap + # before we let the operator persist a fresh-setup config. + default_branch = auth_ref + default_committed = _read_committed_config(repo_path, default_branch) + if default_committed is not None and default_committed.get("mode") in ("team", "solo"): + print() + print( + f" Your branch doesn't have .bicameral/config.yaml, but {default_branch} does." + ) + if not _prompt_yes_no( + " Continue with fresh setup? (Choosing No exits so you can merge first.)", + default=False, + ): + print( + f" Aborted. Run `git merge {default_branch}` to inherit the team config, " + f"then re-run `bicameral-mcp setup`." + ) + return 1 + + collab_mode = _select_collaboration_mode() + if collab_mode == "team": + # #277: Create vs Join vs LocalFolder dispatch for the shared ledger. + team_backend = _select_team_backend(repo_path) + guided = _select_guided_mode() telemetry = _select_telemetry() - team_backend: dict | None = None - if collab_mode == "team": - # #277: Create vs Join vs LocalFolder dispatch for the shared ledger. - team_backend = _select_team_backend(repo_path) - _write_collaboration_config( data_path, collab_mode, @@ -1813,20 +1967,53 @@ def run_setup( return 0 -def run_config_wizard() -> int: - """Interactive CLI wizard for editing bicameral config.yaml. +def _resolve_config_paths(repo_path: Path) -> tuple[Path, Path | None]: + """Resolve (team_config_path, operator_config_path) under the R4 split. - Reads the current config, prompts for each setting via questionary, - writes updated config.yaml, and reinstalls skills/hooks so changes - take effect immediately. + Returns the team config path always (``/.bicameral/config.yaml``) + and the operator config path when the locator can resolve a project id + for ``repo_path``. When the locator can't resolve (non-git scratch + dir), operator_path is None — callers should fall through to the + legacy single-file v0.15.x behavior. """ - import subprocess - import sys + team_path = repo_path / ".bicameral" / "config.yaml" + try: + from ledger_locator import ( + ProjectIdResolutionError, + resolve_operator_config_path, + ) + + return team_path, resolve_operator_config_path(repo_path) + except (ImportError, ProjectIdResolutionError): + return team_path, None + +def _read_yaml_dict(path: Path) -> dict: + if not path.exists(): + return {} try: import yaml - except ImportError: - import json as yaml # fallback: won't write yaml but will read + + return yaml.safe_load(path.read_text(encoding="utf-8")) or {} + except Exception: + return {} + + +def run_config_wizard() -> int: + """Interactive CLI wizard for editing bicameral config under the R4 split. + + Two source files (decision:5nr66wvmapjpt58rrji8): + - ``/.bicameral/config.yaml`` — team-identity keys (committed) + - ``~/.bicameral/projects//operator.yaml`` — per-operator keys + + Each prompt is tagged ``[team]`` or ``[your machine]`` so the operator + sees which file their answer lands in before they pick. Writes go + through ``_write_collaboration_config`` so both files land atomically + or roll back together. Skills/hooks are reinstalled after a successful + write so changes take effect immediately. + """ + import subprocess + import sys _ensure_utf8_stdout() print() @@ -1836,47 +2023,60 @@ def run_config_wizard() -> int: print() repo_path = _detect_repo() - config_path = repo_path / ".bicameral" / "config.yaml" - - # Read current values - if config_path.exists(): - try: - cfg = yaml.safe_load(config_path.read_text(encoding="utf-8")) or {} - except Exception: - cfg = {} - else: - cfg = {} - - cur_mode = cfg.get("mode", "team") - cur_guided = cfg.get("guided", True) - cur_telemetry = cfg.get("telemetry", True) - # Preserve the channel field across the config-wizard rewrite. Without - # this, re-running `bicameral-mcp config` after the user opted into - # nightly would silently drop `channel: nightly` and the rewrite would - # default to stable — re-stranding nightly installs (the exact bug the + team_path, operator_path = _resolve_config_paths(repo_path) + + team_cfg = _read_yaml_dict(team_path) + operator_cfg = _read_yaml_dict(operator_path) if operator_path else {} + + cur_mode = team_cfg.get("mode", "team") + # Per-operator settings come from operator.yaml under R4; pre-R4 repos + # may still have them in config.yaml — fall back so re-running config + # on an unmigrated repo doesn't silently drop the operator's prior choices. + cur_guided = operator_cfg.get("guided", team_cfg.get("guided", True)) + cur_telemetry = operator_cfg.get("telemetry", team_cfg.get("telemetry", True)) + # Preserve the channel field across the rewrite. Without this, + # re-running `bicameral-mcp config` after the user opted into nightly + # would silently drop `channel: nightly` and the rewrite would default + # to stable — re-stranding nightly installs (the exact bug the # auto-detect in `_write_collaboration_config` fixed for fresh setups). - cur_channel = cfg.get("channel") or _detect_install_channel() - - print(f" Current config ({config_path}):") - print(f" mode: {cur_mode}") - print(f" guided: {cur_guided}") - print(f" telemetry: {cur_telemetry}") - print(f" channel: {cur_channel}") + cur_channel = ( + operator_cfg.get("channel") or team_cfg.get("channel") or _detect_install_channel() + ) + # Preserve team_backend across the rewrite — it's set by the setup + # wizard's team-mode dispatch, not by run_config_wizard, but we must + # not wipe it on a re-run. + cur_team_backend: dict | None = team_cfg.get("team") + if cur_team_backend is not None: + # Merge the operator-routed `team.role` from operator.yaml so the + # downstream writer sees the full team_backend block. + op_team = operator_cfg.get("team", {}) or {} + if "role" in op_team and "role" not in cur_team_backend: + cur_team_backend = {**cur_team_backend, "role": op_team["role"]} + + print(" Current config:") + print(f" [team] {team_path}") + if operator_path is not None: + print(f" [your machine] {operator_path}") + else: + print(" [your machine] (legacy single-file layout)") + print(f" mode: {cur_mode} [team]") + print(f" guided: {cur_guided} [your machine]") + print(f" telemetry: {cur_telemetry} [your machine]") + print(f" channel: {cur_channel} [your machine]") print() new_mode = _select_collaboration_mode_with_default(cur_mode) new_guided = _select_guided_mode_with_default(cur_guided) new_telemetry = _select_telemetry_with_default(cur_telemetry) - # Write updated config - config_path.parent.mkdir(parents=True, exist_ok=True) - config_path.write_text( - "# Bicameral configuration\n" - f"mode: {new_mode}\n" - f"guided: {'true' if new_guided else 'false'}\n" - f"telemetry: {'true' if new_telemetry else 'false'}\n" - f"channel: {cur_channel}\n", - encoding="utf-8", + _write_collaboration_config( + data_path=repo_path, + mode=new_mode, + guided=new_guided, + telemetry=new_telemetry, + team_backend=cur_team_backend, + channel=cur_channel, + operator_path=operator_path, ) # Reinstall skills and hooks via subprocess (avoids stale sys.modules) @@ -1927,7 +2127,7 @@ def _select_collaboration_mode_with_default(current: str) -> str: questionary.Choice("Solo — decisions stored locally", value="solo"), ] result = questionary.select( - "Collaboration mode:", + "[team] Collaboration mode:", choices=choices, default=next((c for c in choices if c.value == current), choices[0]), ).ask() @@ -1944,7 +2144,7 @@ def _select_guided_mode_with_default(current: bool) -> bool: questionary.Choice("Normal — advisory hints only", value=False), ] result = questionary.select( - "Interaction intensity:", + "[your machine] Interaction intensity:", choices=choices, default=next((c for c in choices if c.value == current), choices[0]), ).ask() @@ -1961,7 +2161,7 @@ def _select_telemetry_with_default(current: bool) -> bool: questionary.Choice("No — keep telemetry off", value=False), ] result = questionary.select( - "Anonymous telemetry:", + "[your machine] Anonymous telemetry:", choices=choices, default=next((c for c in choices if c.value == current), choices[0]), ).ask() diff --git a/tests/test_config_split.py b/tests/test_config_split.py new file mode 100644 index 00000000..efce596b --- /dev/null +++ b/tests/test_config_split.py @@ -0,0 +1,165 @@ +"""R4 config split (#368) — `_write_collaboration_config` writes two files. + +Team-identity keys (`mode`, `team.backend`/`team.folder_id`/`team.remote_root`) +land in `/.bicameral/config.yaml` (committed). Per-operator keys +(`telemetry`, `channel`, `guided`, `signer_email_fallback`, +`render_source_attribution`, `team.role`) land in +`~/.bicameral/projects//operator.yaml` (private). + +Decision: decision:5nr66wvmapjpt58rrji8. +""" + +from __future__ import annotations + +import subprocess +import sys +from pathlib import Path +from unittest.mock import patch + +import pytest +import yaml + +REPO_ROOT = Path(__file__).resolve().parent.parent +sys.path.insert(0, str(REPO_ROOT)) + +import context # noqa: E402 +import setup_wizard # noqa: E402 + + +@pytest.fixture +def git_repo(tmp_path: Path) -> Path: + """Bare git init so the locator can resolve a project id.""" + subprocess.run(["git", "init", "-q"], cwd=tmp_path, check=True) + return tmp_path + + +def _read_yaml(path: Path) -> dict: + return yaml.safe_load(path.read_text(encoding="utf-8")) or {} + + +def test_team_identity_keys_persist_to_config_yaml(git_repo: Path, tmp_path: Path) -> None: + operator_path = tmp_path / "operator.yaml" + setup_wizard._write_collaboration_config( + data_path=git_repo, + mode="team", + guided=False, + telemetry=False, + team_backend={"backend": "google_drive", "folder_id": "abc123", "role": "founding_member"}, + channel="stable", + operator_path=operator_path, + ) + cfg = _read_yaml(git_repo / ".bicameral" / "config.yaml") + assert cfg["mode"] == "team" + assert cfg["team"]["backend"] == "google_drive" + assert cfg["team"]["folder_id"] == "abc123" + # Per-operator keys must NOT appear in the committed file. + assert "telemetry" not in cfg + assert "channel" not in cfg + assert "guided" not in cfg + assert "signer_email_fallback" not in cfg + assert "render_source_attribution" not in cfg + # team.role is per-operator → not in config.yaml's team block. + assert "role" not in cfg["team"] + + +def test_operator_keys_persist_to_operator_yaml(git_repo: Path, tmp_path: Path) -> None: + operator_path = tmp_path / "operator.yaml" + setup_wizard._write_collaboration_config( + data_path=git_repo, + mode="team", + guided=True, + telemetry=True, + team_backend={"backend": "google_drive", "folder_id": "abc123", "role": "founding_member"}, + channel="stable", + operator_path=operator_path, + ) + op = _read_yaml(operator_path) + assert op["guided"] is True + assert op["telemetry"] is True + assert op["channel"] == "stable" + assert op["signer_email_fallback"] == "local-part-only" + assert op["render_source_attribution"] == "redacted" + assert op["team"]["role"] == "founding_member" + # team-identity keys must NOT appear in operator.yaml. + assert "mode" not in op + assert "backend" not in op.get("team", {}) + assert "folder_id" not in op.get("team", {}) + + +def test_atomic_two_file_write_failure_unlinks_both_temps( + git_repo: Path, tmp_path: Path, monkeypatch +) -> None: + operator_path = tmp_path / "op-dir" / "operator.yaml" + config_path = git_repo / ".bicameral" / "config.yaml" + + real_replace = Path.replace + call_count = {"n": 0} + + def flaky_replace(self: Path, target): + call_count["n"] += 1 + if call_count["n"] == 2: + raise OSError("simulated rename failure on second file") + return real_replace(self, target) + + monkeypatch.setattr(Path, "replace", flaky_replace) + + with pytest.raises(OSError): + setup_wizard._write_collaboration_config( + data_path=git_repo, + mode="solo", + telemetry=False, + channel="stable", + operator_path=operator_path, + ) + + # Neither destination file should exist (operator was rolled back). + assert not operator_path.exists() + assert not config_path.exists() + + # No leftover .tmp artifacts in either parent. + leftovers = list(operator_path.parent.glob("*.tmp")) + list(config_path.parent.glob("*.tmp")) + assert leftovers == [], f"leftover temp files: {leftovers}" + + +def test_routing_table_covers_every_written_key() -> None: + """Every key produced by `_write_collaboration_config` must appear in the + routing table. Catches drift where a new key is added to the writer + but not the routing table. + """ + written_keys = { + "mode", + "guided", + "telemetry", + "channel", + "signer_email_fallback", + "render_source_attribution", + "team.backend", + "team.folder_id", + "team.remote_root", + "team.role", + } + missing = written_keys - context._CONFIG_KEY_ROUTING.keys() + assert missing == set(), f"keys missing from _CONFIG_KEY_ROUTING: {missing}" + + +def test_context_reads_route_per_key(git_repo: Path, tmp_path: Path, monkeypatch) -> None: + """A team key set in config.yaml and an operator key set in operator.yaml + are both reachable through the context.py readers — proving each + reader consults the file owned by its key. + """ + # Stub the locator's operator-path resolver so reads route to a fixture + # path instead of real ~/.bicameral/projects/. + operator_path = tmp_path / "operator.yaml" + monkeypatch.setattr( + "ledger_locator.resolve_operator_config_path", + lambda repo_path=None: operator_path, + ) + + (git_repo / ".bicameral").mkdir() + (git_repo / ".bicameral" / "config.yaml").write_text( + "mode: team\ningest_max_bytes: 524288\n", encoding="utf-8" + ) + operator_path.write_text("guided: true\n", encoding="utf-8") + + assert context._read_guided_mode(str(git_repo)) is True # from operator.yaml + assert context._read_ingest_max_bytes(str(git_repo)) == 524288 # from config.yaml diff --git a/tests/test_run_config_wizard.py b/tests/test_run_config_wizard.py new file mode 100644 index 00000000..c04583f5 --- /dev/null +++ b/tests/test_run_config_wizard.py @@ -0,0 +1,197 @@ +"""R4 two-pane editor for run_config_wizard (#368). + +The editor must: + - Read team-identity keys from /.bicameral/config.yaml + - Read per-operator keys from ~/.bicameral/projects//operator.yaml + - Tag each prompt with [team] or [your machine] so the operator can see + which file their answer lands in + - Write back via the same atomic two-file split as the setup wizard + +Decision: decision:5nr66wvmapjpt58rrji8. +""" + +from __future__ import annotations + +import subprocess +import sys +from pathlib import Path + +import pytest +import yaml + +REPO_ROOT = Path(__file__).resolve().parent.parent +sys.path.insert(0, str(REPO_ROOT)) + +import setup_wizard # noqa: E402 + + +@pytest.fixture(autouse=True) +def _clear_env(monkeypatch): + monkeypatch.delenv("BICAMERAL_AUTHORITATIVE_REF", raising=False) + monkeypatch.delenv("SURREAL_URL", raising=False) + monkeypatch.delenv("CODE_LOCATOR_SQLITE_DB", raising=False) + + +@pytest.fixture +def git_repo(tmp_path: Path, monkeypatch) -> Path: + subprocess.run(["git", "init", "-q", "-b", "main"], cwd=tmp_path, check=True) + subprocess.run(["git", "config", "user.email", "t@t.com"], cwd=tmp_path, check=True) + subprocess.run(["git", "config", "user.name", "T"], cwd=tmp_path, check=True) + monkeypatch.chdir(tmp_path) + return tmp_path + + +class _CapturingSelect: + """Stand-in for `questionary.select(...).ask()` that records the prompt + message + returns a scripted value from the supplied script dict.""" + + def __init__(self, script: dict[str, object], captured: list[dict]) -> None: + self._script = script + self._captured = captured + + def __call__(self, message, choices=None, default=None, **kwargs): + self._captured.append( + { + "message": message, + "choices": [c.value for c in (choices or [])], + "default": getattr(default, "value", default), + } + ) + # Lookup by message prefix — first matching key in the script wins. + for key, value in self._script.items(): + if key in message: + return _Ask(value) + # Default: return the prompt's `default` value (its .value attr). + return _Ask(getattr(default, "value", default)) + + +class _Ask: + def __init__(self, value): + self._value = value + + def ask(self): + return self._value + + +def _install_fake_questionary( + monkeypatch, script: dict[str, object], captured: list[dict] +) -> None: + """Stub `questionary` so the wizard's interactive prompts return + scripted values and record what they were asked.""" + import types + + fake = types.ModuleType("questionary") + fake.select = _CapturingSelect(script, captured) + + class _Choice: + def __init__(self, label, value=None): + self.label = label + self.value = value if value is not None else label + + fake.Choice = _Choice + monkeypatch.setitem(sys.modules, "questionary", fake) + monkeypatch.setattr(setup_wizard, "_is_interactive", lambda: True) + + +def _stub_post_write_install(monkeypatch) -> None: + """The wizard re-installs skills via subprocess after writing. Stub it + out so tests don't shell out and parse the subprocess's stdout.""" + monkeypatch.setattr( + subprocess, + "run", + lambda *a, **kw: subprocess.CompletedProcess(args=a, returncode=0, stdout="0", stderr=""), + ) + + +def test_editor_reads_from_both_files(git_repo: Path, tmp_path: Path, monkeypatch) -> None: + """When mode lives in config.yaml and guided/channel live in operator.yaml, + the editor surfaces both — each tagged with its routing prefix.""" + operator_path = tmp_path / "operator.yaml" + monkeypatch.setattr( + "ledger_locator.resolve_operator_config_path", + lambda repo_path=None: operator_path, + ) + (git_repo / ".bicameral").mkdir() + (git_repo / ".bicameral" / "config.yaml").write_text( + "mode: team\nteam:\n backend: google_drive\n folder_id: x\n", + encoding="utf-8", + ) + operator_path.write_text( + "guided: true\ntelemetry: false\nchannel: stable\n", encoding="utf-8" + ) + + captured: list[dict] = [] + # Empty script → every prompt returns the displayed default → no edits. + _install_fake_questionary(monkeypatch, {}, captured) + _stub_post_write_install(monkeypatch) + monkeypatch.setattr(setup_wizard, "_detect_repo", lambda hint=None: git_repo) + + rc = setup_wizard.run_config_wizard() + assert rc == 0 + + messages = [entry["message"] for entry in captured] + # The mode prompt is team-routed. + assert any("[team]" in m and "Collaboration mode" in m for m in messages), messages + # The guided prompt is operator-routed. + assert any("[your machine]" in m and "Interaction intensity" in m for m in messages), messages + # Defaults reflect what was loaded from each file. + by_msg = {entry["message"]: entry for entry in captured} + mode_entry = next(v for k, v in by_msg.items() if "Collaboration mode" in k) + guided_entry = next(v for k, v in by_msg.items() if "Interaction intensity" in k) + assert mode_entry["default"] == "team" + assert guided_entry["default"] is True + + +def test_editor_writes_to_routed_file(git_repo: Path, tmp_path: Path, monkeypatch) -> None: + """Simulate edits — mode team→solo (team-side), guided true→false + (operator-side). Each change must land in the right file with the + other keys untouched.""" + operator_path = tmp_path / "op-dir" / "operator.yaml" + monkeypatch.setattr( + "ledger_locator.resolve_operator_config_path", + lambda repo_path=None: operator_path, + ) + (git_repo / ".bicameral").mkdir() + (git_repo / ".bicameral" / "config.yaml").write_text( + "mode: team\nteam:\n backend: google_drive\n folder_id: x\n", + encoding="utf-8", + ) + operator_path.parent.mkdir(parents=True, exist_ok=True) + operator_path.write_text( + "guided: true\ntelemetry: false\nchannel: stable\n", encoding="utf-8" + ) + + captured: list[dict] = [] + _install_fake_questionary( + monkeypatch, + { + "Collaboration mode": "solo", + "Interaction intensity": False, + # telemetry stays false — keep it routed without changing + }, + captured, + ) + _stub_post_write_install(monkeypatch) + monkeypatch.setattr(setup_wizard, "_detect_repo", lambda hint=None: git_repo) + + rc = setup_wizard.run_config_wizard() + assert rc == 0 + + team_yaml = yaml.safe_load((git_repo / ".bicameral" / "config.yaml").read_text()) + op_yaml = yaml.safe_load(operator_path.read_text()) + + # Team file: mode flipped to solo; team-backend block untouched per + # _write_collaboration_config's behavior (only emits team: when + # team_backend is passed). For mode=solo the writer drops the team + # block — that's the correct semantic (solo has no team backend). + assert team_yaml["mode"] == "solo" + # Per-operator keys must NOT have leaked into the team file. + for forbidden in ("guided", "telemetry", "channel", "signer_email_fallback"): + assert forbidden not in team_yaml + + # Operator file: guided flipped to false; channel + telemetry unchanged. + assert op_yaml["guided"] is False + assert op_yaml["telemetry"] is False + assert op_yaml["channel"] == "stable" + # Team-identity keys must NOT have leaked into the operator file. + assert "mode" not in op_yaml diff --git a/tests/test_setup_wizard_git_native.py b/tests/test_setup_wizard_git_native.py new file mode 100644 index 00000000..02ff051e --- /dev/null +++ b/tests/test_setup_wizard_git_native.py @@ -0,0 +1,158 @@ +"""R4 git-native onboarding detection (#368) — `run_setup` reads HEAD's +.bicameral/config.yaml via `git show` instead of probing the filesystem. + +Decisions: + - decision:ew9rgegdlblexsraesss — git show HEAD:.bicameral/config.yaml + - decision:ogdfx014sqgc6fi6ky1a — reuse _resolve_authoritative_ref for + the divergence guard +""" + +from __future__ import annotations + +import subprocess +import sys +from pathlib import Path + +import pytest + +REPO_ROOT = Path(__file__).resolve().parent.parent +sys.path.insert(0, str(REPO_ROOT)) + +import setup_wizard # noqa: E402 + + +def _git(args: list[str], cwd: Path) -> None: + subprocess.run(["git"] + args, cwd=cwd, check=True, capture_output=True, text=True) + + +def _commit_config(repo: Path, body: str, message: str = "init") -> None: + bdir = repo / ".bicameral" + bdir.mkdir(exist_ok=True) + (bdir / "config.yaml").write_text(body, encoding="utf-8") + _git(["add", ".bicameral/config.yaml"], repo) + _git(["commit", "-m", message], repo) + + +@pytest.fixture(autouse=True) +def _clear_env(monkeypatch): + """Test repos must resolve their own default branch; outer env shouldn't leak.""" + monkeypatch.delenv("BICAMERAL_AUTHORITATIVE_REF", raising=False) + monkeypatch.delenv("SURREAL_URL", raising=False) + monkeypatch.delenv("CODE_LOCATOR_SQLITE_DB", raising=False) + + +@pytest.fixture +def git_repo(tmp_path: Path) -> Path: + _git(["init", "-q", "-b", "main"], tmp_path) + _git(["config", "user.email", "test@example.com"], tmp_path) + _git(["config", "user.name", "Test User"], tmp_path) + return tmp_path + + +def test_read_committed_config_returns_parsed_yaml(git_repo: Path) -> None: + _commit_config( + git_repo, + "mode: team\nteam:\n backend: google_drive\n folder_id: abc123\n", + ) + parsed = setup_wizard._read_committed_config(git_repo, "HEAD") + assert parsed is not None + assert parsed["mode"] == "team" + assert parsed["team"]["backend"] == "google_drive" + assert parsed["team"]["folder_id"] == "abc123" + + +def test_read_committed_config_returns_none_when_unstaged(git_repo: Path) -> None: + # Repo with one unrelated commit so HEAD exists but config.yaml isn't tracked. + (git_repo / "README.md").write_text("hi\n", encoding="utf-8") + _git(["add", "README.md"], git_repo) + _git(["commit", "-m", "seed"], git_repo) + assert setup_wizard._read_committed_config(git_repo, "HEAD") is None + + +def test_read_committed_config_returns_none_for_malformed_yaml(git_repo: Path) -> None: + _commit_config(git_repo, "::: not yaml :::\n garbage [", message="bad") + assert setup_wizard._read_committed_config(git_repo, "HEAD") is None + + +def test_onboarding_skips_team_prompts_when_head_has_team_config( + git_repo: Path, monkeypatch +) -> None: + """run_setup auto-joins when HEAD's config.yaml declares team mode + + backend block — _select_collaboration_mode and _select_team_backend are + never invoked.""" + _commit_config( + git_repo, + "mode: team\nteam:\n backend: google_drive\n folder_id: abc123\n", + ) + + def _explode(*a, **kw): # would only fire if the prompt path is taken + raise AssertionError("interactive collaboration-mode prompt should be skipped") + + monkeypatch.setattr(setup_wizard, "_select_collaboration_mode", _explode) + monkeypatch.setattr(setup_wizard, "_select_team_backend", _explode) + # Other unrelated prompts → safe defaults + monkeypatch.setattr(setup_wizard, "_select_guided_mode", lambda: False) + monkeypatch.setattr(setup_wizard, "_select_telemetry", lambda: False) + monkeypatch.setattr(setup_wizard, "_select_agents", lambda: ["claude"]) + monkeypatch.setattr(setup_wizard, "_install_for_agent", lambda *a, **kw: None) + monkeypatch.setattr(setup_wizard, "_install_skills", lambda *a, **kw: 0) + monkeypatch.setattr(setup_wizard, "_install_claude_hooks", lambda *a, **kw: False) + monkeypatch.setattr( + setup_wizard, "_install_user_permissions_allowlist", lambda *a, **kw: False + ) + + rc = setup_wizard.run_setup(repo_hint=str(git_repo)) + assert rc == 0 + + +def test_onboarding_skips_solo_prompts_when_head_has_solo_config( + git_repo: Path, monkeypatch +) -> None: + """Symmetric: HEAD's config.yaml says `mode: solo` → skip the mode prompt.""" + _commit_config(git_repo, "mode: solo\n") + + def _explode(*a, **kw): + raise AssertionError("interactive collaboration-mode prompt should be skipped") + + monkeypatch.setattr(setup_wizard, "_select_collaboration_mode", _explode) + monkeypatch.setattr(setup_wizard, "_select_guided_mode", lambda: False) + monkeypatch.setattr(setup_wizard, "_select_telemetry", lambda: False) + monkeypatch.setattr(setup_wizard, "_select_agents", lambda: ["claude"]) + monkeypatch.setattr(setup_wizard, "_install_for_agent", lambda *a, **kw: None) + monkeypatch.setattr(setup_wizard, "_install_skills", lambda *a, **kw: 0) + monkeypatch.setattr(setup_wizard, "_install_claude_hooks", lambda *a, **kw: False) + monkeypatch.setattr( + setup_wizard, "_install_user_permissions_allowlist", lambda *a, **kw: False + ) + + rc = setup_wizard.run_setup(repo_hint=str(git_repo)) + assert rc == 0 + + +def test_divergence_guard_warns_on_branch_without_config( + git_repo: Path, monkeypatch, capsys +) -> None: + """Default branch has the config, feature branch doesn't — prompt + asks the operator before falling through to fresh setup, and the + default-branch name is named in the warning.""" + _commit_config(git_repo, "mode: team\nteam:\n backend: google_drive\n folder_id: x\n") + _git(["checkout", "-q", "-b", "feature-x"], git_repo) + (git_repo / ".bicameral" / "config.yaml").unlink() + _git(["commit", "-am", "drop config"], git_repo) + + seen = {"prompted": False} + + def fake_yes_no(message: str, default: bool = False) -> bool: + seen["prompted"] = True + seen["message"] = message + return False # decline → wizard aborts with merge-first hint + + monkeypatch.setattr(setup_wizard, "_prompt_yes_no", fake_yes_no) + monkeypatch.setattr(setup_wizard, "_select_agents", lambda: ["claude"]) + + rc = setup_wizard.run_setup(repo_hint=str(git_repo)) + assert rc == 1 + assert seen["prompted"] + out = capsys.readouterr().out + assert "main" in out # default branch named + assert ".bicameral/config.yaml" in out diff --git a/tests/test_setup_wizard_omits_state_env_vars.py b/tests/test_setup_wizard_omits_state_env_vars.py new file mode 100644 index 00000000..901dc652 --- /dev/null +++ b/tests/test_setup_wizard_omits_state_env_vars.py @@ -0,0 +1,33 @@ +"""setup_wizard._build_config must NOT write SURREAL_URL or CODE_LOCATOR_SQLITE_DB. + +#368 R4 — the ledger and code-graph paths come from `ledger_locator` at runtime +(resolved to `~/.bicameral/projects//`), not from `.mcp.json` env entries +the wizard writes. The wizard's job is to wire the MCP runner; the locator +owns the state-path question. +""" + +from __future__ import annotations + +import sys +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parent.parent +sys.path.insert(0, str(REPO_ROOT)) + +import setup_wizard # noqa: E402 + + +def test_build_config_no_surreal_url(tmp_path): + config = setup_wizard._build_config(tmp_path, mode="solo") + assert "SURREAL_URL" not in config["env"] + + +def test_build_config_no_code_locator_sqlite_db(tmp_path): + config = setup_wizard._build_config(tmp_path, mode="solo") + assert "CODE_LOCATOR_SQLITE_DB" not in config["env"] + + +def test_build_config_team_mode_no_state_env_vars(tmp_path): + config = setup_wizard._build_config(tmp_path, mode="team") + assert "SURREAL_URL" not in config["env"] + assert "CODE_LOCATOR_SQLITE_DB" not in config["env"] From d78dd6c7b070eb345623fc7c1e31c5c297c8e330 Mon Sep 17 00:00:00 2001 From: jinhongkuan Date: Mon, 18 May 2026 19:53:36 -0700 Subject: [PATCH 7/8] feat(cli): migrate-state + gc CLIs (Phases 3+4, #368) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes out #368: the state move actually moves, and orphans get cleanable. `cli/migrate_state.py` (Phase 3) - Moves project-scoped state from `/.bicameral/` into the locator- resolved project dir at `~/.bicameral/projects//`: ledger.db, code-graph.db (+ shm/wal), bm25_index.pkl, watermark, pending-transcripts/, processed-transcripts/. - Also picks up the v0.15.x user-global `~/.bicameral/ledger.db` on the first project that runs migrate-state, then leaves subsequent projects alone. - R4: partitions a pre-split `/.bicameral/config.yaml` per the `context._CONFIG_KEY_ROUTING` table — team-identity keys stay in the committed file, per-operator keys move to operator.yaml under the project dir. Merges with any pre-existing operator.yaml (existing values win). Unknown keys stay in config.yaml with a warning. - Idempotent (`Nothing to migrate.` exit 0 on second run), archives on byte-different destination collisions to `~/.bicameral/archive//..bak` (default; CLI override via `--archive-dir`), de-dupes on byte-identical collisions, cleans empty source dirs after success. - `--dry-run` enumerates the plan without writing. `--auto` skips the pre-execute confirm prompt. - R4 deferred-ephemeral notice printed in every success summary (decision:e3xz4c4ji4x7lm3lvq4k). - Wired into server.py as `migrate-state` + `migrate-ledger` (alias per the issue verbiage). `cli/gc.py` (Phase 4) - Scans `~/.bicameral/projects//origin.txt` for each project dir, classifying as live / orphan / unreadable. - Default: list. `--delete` prompts per orphan / unreadable dir; `--yes` skips the prompt. Empty `origin.txt` and missing `origin.txt` both classify as `unreadable` so the operator can choose to reclaim. - Wired into server.py as the `gc` subparser. `skills/bicameral-update/SKILL.md` - New Step 3.5: after `bicameral.update(action="apply", ...)`, run `bicameral-mcp migrate-state --auto` before reporting "update complete." Surface stderr verbatim and abort the flow on non-zero exit, with a `--dry-run` offer as the fallback. Tests (17 new, 0 broken): - test_migrate_state.py — 12 tests: full-layout move, idempotent re-run, collision archives, dry-run, missing-source no-op, partial state, auto-flag skips prompts, default archive dir under home, bm25+watermark+transcript-queue explicit coverage, legacy user-global ledger first-project + already-claimed, R4 config.yaml partition - test_gc.py — 5 tests: list-only spares live, delete with per-item prompts, --yes skips prompts, empty origin.txt → unreadable, empty state root 145 tests across Phase 1–4 pass. ruff clean. Co-Authored-By: Claude Opus 4.7 (1M context) --- PLAN.md | 23 ++ cli/gc.py | 125 ++++++++++ cli/migrate_state.py | 376 +++++++++++++++++++++++++++++++ server.py | 45 ++++ skills/bicameral-update/SKILL.md | 23 ++ tests/test_gc.py | 100 ++++++++ tests/test_migrate_state.py | 314 ++++++++++++++++++++++++++ 7 files changed, 1006 insertions(+) create mode 100644 cli/gc.py create mode 100644 cli/migrate_state.py create mode 100644 tests/test_gc.py create mode 100644 tests/test_migrate_state.py diff --git a/PLAN.md b/PLAN.md index e243a940..e40a63db 100644 --- a/PLAN.md +++ b/PLAN.md @@ -109,3 +109,26 @@ new mutating capabilities. |------|------------|-------|--------| | `mocks/code_locator.py` | `RealCodeLocatorAdapter` in `adapters/code_locator.py` | Phase 1 | **Deleted** | | `mocks/decision_ledger.py` | `SurrealDBLedgerAdapter` in `ledger/adapter.py` | Phase 2 | **Deleted** | + +--- + +## Ledger Locator (#368) + +Plan: [`thoughts/shared/plans/2026-05-16-ledger-locator-and-migration.md`](thoughts/shared/plans/2026-05-16-ledger-locator-and-migration.md). +R4 audit: PASS (locator + delegation + wizard split + git-native onboarding + migrate-state + gc). + +- [x] **Phase 1** — `ledger_locator/` module: `_project_id.py`, `_origin_guard.py`, + `__init__.py` with `resolve_*` paths (ledger, code_graph, bm25, + watermark, pending/processed transcripts, operator_config); explicit + VCS contract; origin-collision guard +- [x] **Phase 2A** — derived-state resolvers (bm25, watermark, transcripts) +- [x] **Phase 2B-i** — ledger + code-graph delegation in adapter / runtime / config +- [x] **Phase 2B-ii** — events/materializer + events/transcript_queue delegate to locator +- [x] **Phase 2C** — setup_wizard env-var strip + R4 config split + (`context._CONFIG_KEY_ROUTING`, `_write_collaboration_config` atomic + two-file write, git-native onboarding detection + divergence guard, + `run_config_wizard` two-pane editor) +- [x] **Phase 3** — `cli/migrate_state.py` + `migrate-state` / `migrate-ledger` + server subparsers + R4 `config.yaml` key partition + bicameral-update + skill post-upgrade hook +- [x] **Phase 4** — `cli/gc.py` orphan project-dir reclaim (list + `--delete`) diff --git a/cli/gc.py b/cli/gc.py new file mode 100644 index 00000000..08f02483 --- /dev/null +++ b/cli/gc.py @@ -0,0 +1,125 @@ +"""`bicameral-mcp gc` — reclaim project dirs whose origin no longer resolves. + +Each `~/.bicameral/projects//origin.txt` names the absolute git +common-dir path the locator hashed at first resolve. When the named +path no longer exists (project deleted, relocated, or rebased), the +project dir is orphaned. `gc` lists orphans by default and deletes +them under `--delete` after a per-item prompt (or `--yes` to skip). +""" + +from __future__ import annotations + +import argparse +import shutil +from pathlib import Path +from typing import Literal + +Status = Literal["live", "orphan", "unreadable"] + + +def _scan(state_root: Path) -> list[tuple[str, Path, Status, str | None]]: + """Yield (project_id, project_dir, status, origin_text) for every + immediate subdirectory of ``state_root``. + + - ``live`` — origin.txt points at an existing directory. + - ``orphan`` — origin.txt parses but the named path is gone. + - ``unreadable`` — origin.txt missing, empty, or not a regular file. + """ + out: list[tuple[str, Path, Status, str | None]] = [] + if not state_root.is_dir(): + return out + for child in sorted(state_root.iterdir()): + if not child.is_dir(): + continue + origin = child / "origin.txt" + if not origin.is_file(): + out.append((child.name, child, "unreadable", None)) + continue + try: + text = origin.read_text(encoding="utf-8").strip() + except OSError: + out.append((child.name, child, "unreadable", None)) + continue + if not text: + out.append((child.name, child, "unreadable", None)) + continue + if Path(text).is_dir(): + out.append((child.name, child, "live", text)) + else: + out.append((child.name, child, "orphan", text)) + return out + + +def _print_table(rows: list[tuple[str, Path, Status, str | None]]) -> None: + if not rows: + print(" (no projects under ~/.bicameral/projects/)") + return + print(f" {'STATUS':<10} {'PROJECT-ID':<18} ORIGIN") + for _project_id, _path, status, origin_text in rows: + # Pad the project id column with the first 16 chars of the dir name. + print(f" {status:<10} {_path.name:<18} {origin_text or '(unreadable)'}") + + +def _confirm_delete(project_dir: Path, status: Status, origin: str | None) -> bool: + """Per-item prompt. Empty / `y` / `yes` confirms; anything else declines.""" + try: + response = input( + f" Delete {project_dir.name} ({status} — origin={origin or 'unreadable'})? [y/N]: " + ).strip().lower() + except (EOFError, OSError): + return False + return response in ("y", "yes") + + +def _build_argparser() -> argparse.ArgumentParser: + p = argparse.ArgumentParser( + prog="bicameral-mcp gc", + description="List or delete orphan project dirs under ~/.bicameral/projects/ (#368).", + ) + p.add_argument( + "--delete", + action="store_true", + help="prompt to delete each orphan (and unreadable) project dir", + ) + p.add_argument( + "--yes", + action="store_true", + help="under --delete: skip per-item prompts and delete every orphan/unreadable dir", + ) + p.add_argument( + "--state-root", + default=None, + metavar="PATH", + help="override ~/.bicameral/projects/ (test fixture knob)", + ) + return p + + +def main(argv: list[str] | None = None) -> int: + args = _build_argparser().parse_args(argv) + + if args.state_root is not None: + state_root = Path(args.state_root).expanduser().resolve() + else: + try: + from ledger_locator import STATE_ROOT + + state_root = STATE_ROOT + except ImportError as exc: # pragma: no cover + print(f" ERROR: ledger_locator unavailable: {exc}") + return 2 + + rows = _scan(state_root) + if not args.delete: + _print_table(rows) + return 0 + + to_delete = [r for r in rows if r[2] in ("orphan", "unreadable")] + if not to_delete: + print(" No orphan project dirs.") + return 0 + for _project_id, path, status, origin in to_delete: + if args.yes or _confirm_delete(path, status, origin): + shutil.rmtree(path, ignore_errors=False) + print(f" Removed {path}") + return 0 diff --git a/cli/migrate_state.py b/cli/migrate_state.py new file mode 100644 index 00000000..afcf6efa --- /dev/null +++ b/cli/migrate_state.py @@ -0,0 +1,376 @@ +"""`bicameral-mcp migrate-state` — relocate project-scoped state under #368. + +Moves every project-scoped state file from `/.bicameral/` (and the +v0.15.x user-global `~/.bicameral/ledger.db`) into the locator-resolved +project dir at `~/.bicameral/projects//`. Idempotent, archives on +collision, cleans up empty source directories after success. + +Also partitions a pre-R4 `/.bicameral/config.yaml` by routing each +key to either the (trimmed) team config or the new `operator.yaml` under +the project dir per `context._CONFIG_KEY_ROUTING`. Unknown keys stay in +config.yaml with a warning logged — forward-compat for future versions. +""" + +from __future__ import annotations + +import argparse +import shutil +from datetime import UTC, datetime +from pathlib import Path + +# Single files that move from /.bicameral//. +_FILE_SOURCES: tuple[str, ...] = ( + ".bicameral/ledger.db", + ".bicameral/local/code-graph.db", + ".bicameral/local/code-graph.db-shm", + ".bicameral/local/code-graph.db-wal", + ".bicameral/local/bm25_index.pkl", + ".bicameral/local/watermark", +) + +# Directories whose contents move file-by-file into //. +# After all files migrate, the source directory is removed if empty. +_DIR_SOURCES: tuple[str, ...] = ( + ".bicameral/pending-transcripts", + ".bicameral/processed-transcripts", +) + +_LEGACY_USER_GLOBAL_LEDGER = Path.home() / ".bicameral" / "ledger.db" + + +def _iso8601_stamp() -> str: + return datetime.now(UTC).strftime("%Y%m%dT%H%M%SZ") + + +def _files_byte_equal(a: Path, b: Path) -> bool: + """Compare two file contents byte-for-byte. Returns False when sizes + differ or either side is unreadable (treat as "collision unsafe").""" + try: + if a.stat().st_size != b.stat().st_size: + return False + return a.read_bytes() == b.read_bytes() + except OSError: + return False + + +def _archive(dest: Path, archive_dir: Path) -> Path: + """Move ``dest`` to ``/..bak`` and + return the archive path. Caller ensures ``dest`` exists.""" + archive_dir.mkdir(parents=True, exist_ok=True) + archive_path = archive_dir / f"{dest.name}.{_iso8601_stamp()}.bak" + shutil.move(str(dest), str(archive_path)) + return archive_path + + +def _plan_file_moves( + repo: Path, project_dir: Path +) -> list[tuple[Path, Path]]: + """Plan the (src, dest) pairs for single files (not dir contents).""" + plan: list[tuple[Path, Path]] = [] + for rel in _FILE_SOURCES: + src = repo / rel + if src.exists() and src.is_file(): + plan.append((src, project_dir / src.name)) + return plan + + +def _plan_dir_contents_moves( + repo: Path, project_dir: Path +) -> list[tuple[Path, Path]]: + """Plan (src_file, dest_file) pairs for every file inside the + pending/processed-transcripts source dirs.""" + plan: list[tuple[Path, Path]] = [] + for rel in _DIR_SOURCES: + src_dir = repo / rel + if not src_dir.is_dir(): + continue + dest_dir = project_dir / src_dir.name + for child in src_dir.iterdir(): + if child.is_file(): + plan.append((child, dest_dir / child.name)) + return plan + + +def _maybe_legacy_user_global_move(project_dir: Path) -> tuple[Path, Path] | None: + """If ``~/.bicameral/ledger.db`` exists and the locator-resolved + ledger doesn't yet, plan a one-shot relocation. Returns None when + nothing to migrate (file absent, or some other project already + claimed it via a fresh `~/.bicameral/projects//ledger.db`). + """ + src = _LEGACY_USER_GLOBAL_LEDGER + if not src.is_file(): + return None + dest = project_dir / "ledger.db" + if dest.exists(): + # First project to migrate already claimed it — leave the source + # alone (it may also have been already deleted in a prior run). + return None + return (src, dest) + + +def _partition_config_yaml( + repo: Path, project_dir: Path, *, dry_run: bool +) -> tuple[bool, list[str]]: + """R4 partition of a pre-split `.bicameral/config.yaml`. + + Splits each key into either the trimmed config.yaml (team-identity) + or operator.yaml (per-operator) per `context._CONFIG_KEY_ROUTING`. + Returns ``(did_something, warnings)``. No-ops if the file is missing + or already split (no operator-routed keys present). + """ + config_path = repo / ".bicameral" / "config.yaml" + if not config_path.is_file(): + return False, [] + + try: + import yaml + except ImportError: + return False, ["yaml unavailable — leaving config.yaml untouched"] + + try: + loaded = yaml.safe_load(config_path.read_text(encoding="utf-8")) or {} + except Exception as exc: # noqa: BLE001 — fail-soft + return False, [f"config.yaml unparseable ({exc}) — leaving untouched"] + if not isinstance(loaded, dict): + return False, ["config.yaml top-level is not a mapping — leaving untouched"] + + from context import _CONFIG_KEY_ROUTING + + team_out: dict = {} + operator_out: dict = {} + warnings: list[str] = [] + + def _route(flat_key: str) -> str | None: + return _CONFIG_KEY_ROUTING.get(flat_key) + + # Top-level scalar keys + for key, value in loaded.items(): + if key == "team" and isinstance(value, dict): + continue # nested handling below + routing = _route(key) + if routing == "operator": + operator_out[key] = value + elif routing == "team": + team_out[key] = value + else: + warnings.append(f"unknown key in config.yaml — left in team file: {key!r}") + team_out[key] = value + + # `team.*` nested keys split per their own routing. + nested = loaded.get("team") + if isinstance(nested, dict): + team_block: dict = {} + op_team_block: dict = {} + for sub_key, sub_value in nested.items(): + routing = _route(f"team.{sub_key}") + if routing == "operator": + op_team_block[sub_key] = sub_value + elif routing == "team": + team_block[sub_key] = sub_value + else: + warnings.append( + f"unknown nested key in config.yaml — left in team file: team.{sub_key!r}" + ) + team_block[sub_key] = sub_value + if team_block: + team_out["team"] = team_block + if op_team_block: + operator_out["team"] = op_team_block + + has_operator_payload = bool(operator_out) + did_something = has_operator_payload # nothing to do if every key is already team-side + + if dry_run: + return did_something, warnings + + if has_operator_payload: + operator_path = project_dir / "operator.yaml" + operator_path.parent.mkdir(parents=True, exist_ok=True) + # Merge with any pre-existing operator.yaml so a partial prior + # migration doesn't clobber operator-only keys the wizard wrote. + if operator_path.exists(): + try: + existing = yaml.safe_load(operator_path.read_text(encoding="utf-8")) or {} + if isinstance(existing, dict): + # Existing values WIN — they're the operator's chosen state. + merged = {**operator_out, **existing} + operator_out = merged + except Exception as exc: # noqa: BLE001 + warnings.append(f"existing operator.yaml unreadable ({exc}); overwriting") + operator_path.write_text(yaml.safe_dump(operator_out, sort_keys=False), encoding="utf-8") + + # Trimmed team file: rewrite even when nothing moved out, so subsequent + # runs see a consistent shape. Skip the write entirely if the team body + # would be identical to the on-disk contents (idempotency). + new_team_text = yaml.safe_dump(team_out, sort_keys=False) if team_out else "" + cur_text = config_path.read_text(encoding="utf-8") + if new_team_text != cur_text and has_operator_payload: + config_path.write_text(new_team_text, encoding="utf-8") + + return did_something, warnings + + +def _execute_plan( + plan: list[tuple[Path, Path]], + archive_dir: Path, + *, + dry_run: bool, +) -> list[tuple[str, Path, Path, Path | None]]: + """Move each (src, dest) pair. Archive collisions when bytes differ. + Returns a list of (action, src, dest, archive_path) records for + summary printing.""" + log: list[tuple[str, Path, Path, Path | None]] = [] + for src, dest in plan: + if not src.exists(): + continue + if dest.exists(): + if _files_byte_equal(src, dest): + if not dry_run: + src.unlink() + log.append(("dedup", src, dest, None)) + continue + archive_path: Path | None = None + if not dry_run: + archive_path = _archive(dest, archive_dir) + log.append(("archive+move", src, dest, archive_path)) + else: + log.append(("move", src, dest, None)) + if not dry_run: + dest.parent.mkdir(parents=True, exist_ok=True) + shutil.move(str(src), str(dest)) + return log + + +def _cleanup_empty_source_dirs(repo: Path, *, dry_run: bool) -> list[Path] : + """Remove `/.bicameral/{local,pending-transcripts,processed-transcripts}` + if they're empty after the migration. Returns the list of removed dirs.""" + removed: list[Path] = [] + for rel in (".bicameral/local", *_DIR_SOURCES): + candidate = repo / rel + if candidate.is_dir() and not any(candidate.iterdir()): + removed.append(candidate) + if not dry_run: + candidate.rmdir() + return removed + + +def _print_summary( + log: list[tuple[str, Path, Path, Path | None]], + empty_dirs_removed: list[Path], + config_did_split: bool, + config_warnings: list[str], + *, + dry_run: bool, +) -> None: + if not log and not empty_dirs_removed and not config_did_split: + print(" Nothing to migrate.") + return + prefix = "[dry-run] " if dry_run else "" + for action, src, dest, archive_path in log: + if action == "dedup": + print(f" {prefix}skip (identical bytes) — removed {src} → kept {dest}") + elif action == "archive+move": + print(f" {prefix}archive {dest} → {archive_path}") + print(f" {prefix}move {src} → {dest}") + else: + print(f" {prefix}move {src} → {dest}") + for d in empty_dirs_removed: + print(f" {prefix}rmdir {d}") + if config_did_split: + print(f" {prefix}split config.yaml → team-identity + operator.yaml") + for w in config_warnings: + print(f" WARN: {w}") + # R4 deferred-ephemeral notice (decision:e3xz4c4ji4x7lm3lvq4k). + print( + " Note: ~/.bicameral/projects/ persists per home directory. If you run " + "bicameral inside an ephemeral container (Codespaces, devcontainer, CI), " + "state will be lost at teardown. Full ephemeral-environment support is " + "tracked separately under v0.16.1/v0.17." + ) + + +def _build_argparser() -> argparse.ArgumentParser: + p = argparse.ArgumentParser( + prog="bicameral-mcp migrate-state", + description="Move project-scoped state into ~/.bicameral/projects// (#368).", + ) + p.add_argument( + "--repo", default=None, metavar="PATH", help="repo path (default: cwd)" + ) + p.add_argument( + "--auto", action="store_true", help="non-interactive — skip the confirm prompt" + ) + p.add_argument("--dry-run", action="store_true", help="plan only; write nothing") + p.add_argument( + "--archive-dir", + default=None, + metavar="PATH", + help="collisions land here (default: ~/.bicameral/archive//)", + ) + return p + + +def main(argv: list[str] | None = None) -> int: + parser = _build_argparser() + args = parser.parse_args(argv) + + repo = Path(args.repo).resolve() if args.repo else Path.cwd() + + try: + from ledger_locator import ( + ProjectIdResolutionError, + project_dir_for, + project_id_for, + ) + except ImportError as exc: # pragma: no cover + print(f" ERROR: ledger_locator unavailable: {exc}") + return 2 + + try: + project_id = project_id_for(repo) + project_dir = project_dir_for(repo) + except ProjectIdResolutionError as exc: + print(f" ERROR: {exc}") + return 2 + + archive_dir = ( + Path(args.archive_dir).expanduser().resolve() + if args.archive_dir + else Path.home() / ".bicameral" / "archive" / project_id + ) + + plan = _plan_file_moves(repo, project_dir) + plan += _plan_dir_contents_moves(repo, project_dir) + legacy = _maybe_legacy_user_global_move(project_dir) + if legacy is not None: + plan.append(legacy) + + if plan: + project_dir.mkdir(parents=True, exist_ok=True) + + if not args.auto and not args.dry_run and plan: + # Best-effort confirmation. Stdin not a tty → fall through. + try: + response = input( + f" Migrate {len(plan)} files into {project_dir}? [y/N]: " + ).strip().lower() + except (EOFError, OSError): + response = "y" + if response not in ("y", "yes"): + print(" Aborted (run with --auto to skip the prompt).") + return 1 + + log = _execute_plan(plan, archive_dir, dry_run=args.dry_run) + config_did_split, config_warnings = _partition_config_yaml( + repo, project_dir, dry_run=args.dry_run + ) + empty_dirs_removed = _cleanup_empty_source_dirs(repo, dry_run=args.dry_run) + + _print_summary( + log, + empty_dirs_removed, + config_did_split, + config_warnings, + dry_run=args.dry_run, + ) + return 0 diff --git a/server.py b/server.py index 00500eb0..69c47acc 100644 --- a/server.py +++ b/server.py @@ -1491,6 +1491,27 @@ def _register_subparsers(parser: ArgumentParser, subparsers: Any) -> None: metavar="PATH", help="read JSONL from file instead of stdin", ) + for name in ("migrate-state", "migrate-ledger"): + # `migrate-ledger` is an alias for `migrate-state` so #368's + # issue verbiage (and any docs that say "migrate-ledger") work. + msp = subparsers.add_parser( + name, + help=( + "move ledger + code-graph + bm25/watermark/transcripts " + "to ~/.bicameral/projects// (#368)" + ), + ) + msp.add_argument("--repo", default=None, metavar="PATH") + msp.add_argument("--auto", action="store_true") + msp.add_argument("--dry-run", action="store_true") + msp.add_argument("--archive-dir", default=None, metavar="PATH") + gc = subparsers.add_parser( + "gc", + help="list or delete orphan ~/.bicameral/projects// dirs (#368)", + ) + gc.add_argument("--delete", action="store_true") + gc.add_argument("--yes", action="store_true") + gc.add_argument("--state-root", default=None, metavar="PATH") parser.add_argument( "--smoke-test", action="store_true", help="validate wiring + list MCP tools, exit" ) @@ -1535,6 +1556,30 @@ def _dispatch(args: Any) -> int: from cli.ledger_import_cli import main as import_main return import_main(getattr(args, "from_file", None)) + if args.command in ("migrate-state", "migrate-ledger"): + from cli.migrate_state import main as migrate_main + + argv: list[str] = [] + if args.repo: + argv += ["--repo", args.repo] + if args.auto: + argv += ["--auto"] + if args.dry_run: + argv += ["--dry-run"] + if args.archive_dir: + argv += ["--archive-dir", args.archive_dir] + return migrate_main(argv) + if args.command == "gc": + from cli.gc import main as gc_main + + argv = [] + if args.delete: + argv += ["--delete"] + if args.yes: + argv += ["--yes"] + if args.state_root: + argv += ["--state-root", args.state_root] + return gc_main(argv) if args.smoke_test: result = asyncio.run(run_smoke_test()) print(f"{result['server_name']} {result['server_version']} smoke test passed") diff --git a/skills/bicameral-update/SKILL.md b/skills/bicameral-update/SKILL.md index f6ff5e2c..77055404 100644 --- a/skills/bicameral-update/SKILL.md +++ b/skills/bicameral-update/SKILL.md @@ -63,6 +63,29 @@ The server will: Then list `migration_replay_plan` entries and ask: "Re-ingest now? (yes/no)" +## Step 3.5 — Migrate state layout (v0.16 onwards, #368) + +After `bicameral.update(action="apply", ...)` returns successfully and BEFORE +reporting "update complete", run: + +``` +bicameral-mcp migrate-state --auto +``` + +via a Bash tool call. This relocates any pre-v0.16 ledger / code-graph / +bm25 / watermark / transcript-queue files out of `/.bicameral/` into +`~/.bicameral/projects//` and partitions pre-R4 `config.yaml` keys into +the team file and the operator file. The command is idempotent — on a +fresh-install repo it prints `Nothing to migrate.` and exits 0; on a +previously-installed repo it surfaces the planned moves before applying. + +If the binary exits non-zero: +1. Surface the stderr verbatim to the user. +2. Abort Step 4 — do NOT report "update complete." +3. Tell the user the new binary is installed but state is in a mixed + layout; offer to run `bicameral-mcp migrate-state --dry-run --auto` so + they can see what the migration would do. + ## Step 4 — Confirm success Report: `bicameral-mcp updated to v{recommended_version}. {skills_updated} skill(s) reinstalled.` diff --git a/tests/test_gc.py b/tests/test_gc.py new file mode 100644 index 00000000..1f29f6b2 --- /dev/null +++ b/tests/test_gc.py @@ -0,0 +1,100 @@ +"""Tests for `bicameral-mcp gc` (#368 Phase 4). + +Lists / deletes orphan project dirs under ~/.bicameral/projects/. +""" + +from __future__ import annotations + +import sys +from pathlib import Path + +import pytest + +REPO_ROOT = Path(__file__).resolve().parent.parent +sys.path.insert(0, str(REPO_ROOT)) + +from cli import gc # noqa: E402 + + +def _seed(state_root: Path, project_id: str, origin_target: Path | None | str) -> Path: + """Create `//origin.txt` pointing at the + given target (Path → real path, str → literal string for unreadable + cases, None → omit origin.txt entirely).""" + project_dir = state_root / project_id + project_dir.mkdir(parents=True, exist_ok=True) + origin = project_dir / "origin.txt" + if origin_target is None: + return project_dir + text = str(origin_target) if isinstance(origin_target, Path) else origin_target + origin.write_text(text, encoding="utf-8") + return project_dir + + +def test_lists_orphans_and_keeps_live_projects(tmp_path: Path, capsys) -> None: + state_root = tmp_path / "projects" + live_target = tmp_path / "live-repo" / ".git" + live_target.mkdir(parents=True) + _seed(state_root, "live-id-1234567890", live_target) + _seed(state_root, "orphan-id-aaaa1234", tmp_path / "vanished" / ".git") + + rc = gc.main(["--state-root", str(state_root)]) + assert rc == 0 + out = capsys.readouterr().out + assert "orphan-id-aaaa1234" in out + assert "orphan" in out + assert "live-id-1234567890" in out + assert "live" in out + + +def test_delete_prompts_per_item_and_removes_confirmed_ones( + tmp_path: Path, monkeypatch +) -> None: + state_root = tmp_path / "projects" + a = _seed(state_root, "orphan-aaaa1234567890", tmp_path / "gone-a" / ".git") + b = _seed(state_root, "orphan-bbbb1234567890", tmp_path / "gone-b" / ".git") + + responses = iter(["y", "n"]) + monkeypatch.setattr("builtins.input", lambda *a, **kw: next(responses)) + + rc = gc.main(["--delete", "--state-root", str(state_root)]) + assert rc == 0 + assert not a.exists() + assert b.exists() + + +def test_delete_with_yes_flag_skips_prompts(tmp_path: Path, monkeypatch) -> None: + state_root = tmp_path / "projects" + a = _seed(state_root, "orphan-aaaa1234567890", tmp_path / "gone-a" / ".git") + b = _seed(state_root, "orphan-bbbb1234567890", tmp_path / "gone-b" / ".git") + + def _explode(*a, **kw): + raise AssertionError("prompt should be skipped under --yes") + + monkeypatch.setattr("builtins.input", _explode) + rc = gc.main(["--delete", "--yes", "--state-root", str(state_root)]) + assert rc == 0 + assert not a.exists() + assert not b.exists() + + +def test_skips_unreadable_origin_txt_with_warn(tmp_path: Path, capsys) -> None: + state_root = tmp_path / "projects" + # origin.txt missing entirely + _seed(state_root, "no-origin-abcdef12", None) + # origin.txt is empty + pid = _seed(state_root, "empty-origin-12345", "") + + rc = gc.main(["--state-root", str(state_root)]) + assert rc == 0 + out = capsys.readouterr().out + assert "unreadable" in out + # Default (list-only) does NOT delete. + assert pid.exists() + + +def test_empty_state_root_lists_cleanly(tmp_path: Path, capsys) -> None: + state_root = tmp_path / "projects-empty" + rc = gc.main(["--state-root", str(state_root)]) + assert rc == 0 + out = capsys.readouterr().out + assert "no projects" in out.lower() diff --git a/tests/test_migrate_state.py b/tests/test_migrate_state.py new file mode 100644 index 00000000..7bc55776 --- /dev/null +++ b/tests/test_migrate_state.py @@ -0,0 +1,314 @@ +"""Tests for `bicameral-mcp migrate-state` (#368 Phase 3). + +Moves project-scoped state from `/.bicameral/` into the +locator-resolved project dir at `~/.bicameral/projects//`. +""" + +from __future__ import annotations + +import os +import subprocess +import sys +from pathlib import Path + +import pytest + +REPO_ROOT = Path(__file__).resolve().parent.parent +sys.path.insert(0, str(REPO_ROOT)) + +from cli import migrate_state # noqa: E402 + + +@pytest.fixture(autouse=True) +def _clear_env(monkeypatch): + monkeypatch.delenv("SURREAL_URL", raising=False) + monkeypatch.delenv("CODE_LOCATOR_SQLITE_DB", raising=False) + monkeypatch.delenv("BICAMERAL_LOCATOR_ALLOW_COLLISION", raising=False) + + +@pytest.fixture +def git_repo(tmp_path: Path) -> Path: + subprocess.run(["git", "init", "-q"], cwd=tmp_path, check=True) + return tmp_path + + +def _seed_full_legacy_layout(repo: Path) -> dict[str, bytes]: + """Populate every source path with deterministic random-ish bytes. + Returns a dict of relpath → contents for downstream byte-for-byte + assertions.""" + payloads: dict[str, bytes] = {} + (repo / ".bicameral").mkdir(exist_ok=True) + (repo / ".bicameral" / "local").mkdir(exist_ok=True) + (repo / ".bicameral" / "pending-transcripts").mkdir(exist_ok=True) + (repo / ".bicameral" / "processed-transcripts").mkdir(exist_ok=True) + + files = { + ".bicameral/ledger.db": b"ledger-bytes-" + os.urandom(64), + ".bicameral/local/code-graph.db": b"cg-bytes-" + os.urandom(64), + ".bicameral/local/code-graph.db-shm": b"shm-" + os.urandom(32), + ".bicameral/local/code-graph.db-wal": b"wal-" + os.urandom(32), + ".bicameral/local/bm25_index.pkl": b"bm25-" + os.urandom(64), + ".bicameral/local/watermark": b'{"peer@example.com": 17}', + ".bicameral/pending-transcripts/sess1.jsonl": b'{"sid":"s1"}\n', + ".bicameral/pending-transcripts/sess2.jsonl": b'{"sid":"s2"}\n', + ".bicameral/processed-transcripts/sess0.jsonl": b'{"sid":"s0"}\n', + } + for rel, body in files.items(): + (repo / rel).write_bytes(body) + payloads[rel] = body + return payloads + + +def _project_dir(repo: Path) -> Path: + import ledger_locator + + return ledger_locator.project_dir_for(repo) + + +def test_moves_ledger_and_code_graph_in_one_pass(git_repo: Path) -> None: + seeded = _seed_full_legacy_layout(git_repo) + pdir = _project_dir(git_repo) + + rc = migrate_state.main(["--repo", str(git_repo), "--auto"]) + assert rc == 0 + + expected_pairs = { + ".bicameral/ledger.db": "ledger.db", + ".bicameral/local/code-graph.db": "code-graph.db", + ".bicameral/local/code-graph.db-shm": "code-graph.db-shm", + ".bicameral/local/code-graph.db-wal": "code-graph.db-wal", + ".bicameral/local/bm25_index.pkl": "bm25_index.pkl", + ".bicameral/local/watermark": "watermark", + ".bicameral/pending-transcripts/sess1.jsonl": "pending-transcripts/sess1.jsonl", + ".bicameral/pending-transcripts/sess2.jsonl": "pending-transcripts/sess2.jsonl", + ".bicameral/processed-transcripts/sess0.jsonl": "processed-transcripts/sess0.jsonl", + } + for src_rel, dest_rel in expected_pairs.items(): + assert not (git_repo / src_rel).exists(), f"source survived: {src_rel}" + dest = pdir / dest_rel + assert dest.exists(), f"dest missing: {dest_rel}" + assert dest.read_bytes() == seeded[src_rel] + + # Empty source dirs should be cleaned up. + assert not (git_repo / ".bicameral" / "local").exists() + assert not (git_repo / ".bicameral" / "pending-transcripts").exists() + assert not (git_repo / ".bicameral" / "processed-transcripts").exists() + + +def test_idempotent_second_run_is_noop(git_repo: Path, capsys) -> None: + _seed_full_legacy_layout(git_repo) + pdir = _project_dir(git_repo) + migrate_state.main(["--repo", str(git_repo), "--auto"]) + capsys.readouterr() # discard first-run output + mtimes_before = { + p: p.stat().st_mtime for p in pdir.rglob("*") if p.is_file() + } + rc = migrate_state.main(["--repo", str(git_repo), "--auto"]) + assert rc == 0 + out = capsys.readouterr().out + assert "Nothing to migrate." in out + mtimes_after = { + p: p.stat().st_mtime for p in pdir.rglob("*") if p.is_file() + } + assert mtimes_after == mtimes_before, "second run touched destination files" + + +def test_collision_archives_destination(git_repo: Path, tmp_path: Path) -> None: + _seed_full_legacy_layout(git_repo) + pdir = _project_dir(git_repo) + pdir.mkdir(parents=True, exist_ok=True) + # Pre-populate the destination with DIFFERENT content so the migrator + # has to archive the existing file before moving the source in. + existing = b"i-am-an-older-ledger" + (pdir / "ledger.db").write_bytes(existing) + + archive_dir = tmp_path / "archive" + rc = migrate_state.main( + [ + "--repo", + str(git_repo), + "--auto", + "--archive-dir", + str(archive_dir), + ] + ) + assert rc == 0 + archives = list(archive_dir.glob("ledger.db.*.bak")) + assert len(archives) == 1 + assert archives[0].read_bytes() == existing + # Source no longer exists, dest now holds the source's bytes. + assert not (git_repo / ".bicameral" / "ledger.db").exists() + assert (pdir / "ledger.db").read_bytes() != existing + + +def test_dry_run_writes_nothing(git_repo: Path, capsys) -> None: + seeded = _seed_full_legacy_layout(git_repo) + pdir = _project_dir(git_repo) + # Make sure the destination dir doesn't exist before the dry-run so + # we can assert it stays absent. + if pdir.exists(): + import shutil + + shutil.rmtree(pdir, ignore_errors=True) + rc = migrate_state.main(["--repo", str(git_repo), "--dry-run", "--auto"]) + assert rc == 0 + # Sources unchanged + for rel, body in seeded.items(): + assert (git_repo / rel).read_bytes() == body + # No destinations created + assert not pdir.exists() or not any(pdir.iterdir()) + # Output enumerates planned moves + out = capsys.readouterr().out + assert "[dry-run]" in out + + +def test_missing_source_skips_silently(git_repo: Path, capsys) -> None: + rc = migrate_state.main(["--repo", str(git_repo), "--auto"]) + assert rc == 0 + out = capsys.readouterr().out + assert "Nothing to migrate." in out + + +def test_partial_state_migrates_what_exists(git_repo: Path) -> None: + (git_repo / ".bicameral").mkdir() + (git_repo / ".bicameral" / "ledger.db").write_bytes(b"only-ledger-here") + pdir = _project_dir(git_repo) + + rc = migrate_state.main(["--repo", str(git_repo), "--auto"]) + assert rc == 0 + assert (pdir / "ledger.db").read_bytes() == b"only-ledger-here" + assert not (pdir / "code-graph.db").exists() + + +def test_auto_flag_skips_prompts(git_repo: Path, monkeypatch) -> None: + _seed_full_legacy_layout(git_repo) + + def _explode(*a, **kw): + raise AssertionError("interactive prompt should be skipped under --auto") + + monkeypatch.setattr("builtins.input", _explode) + rc = migrate_state.main(["--repo", str(git_repo), "--auto"]) + assert rc == 0 + + +def test_archive_dir_defaults_under_home(git_repo: Path, tmp_path: Path, monkeypatch) -> None: + monkeypatch.setenv("HOME", str(tmp_path)) + # Re-import ledger_locator under the new HOME so STATE_ROOT picks up + # the redirected path. + import importlib + + import ledger_locator + + importlib.reload(ledger_locator) + + _seed_full_legacy_layout(git_repo) + pdir = ledger_locator.project_dir_for(git_repo) + pdir.mkdir(parents=True, exist_ok=True) + (pdir / "ledger.db").write_bytes(b"older-content") + + rc = migrate_state.main(["--repo", str(git_repo), "--auto"]) + assert rc == 0 + + expected_archive_root = tmp_path / ".bicameral" / "archive" + archives = list(expected_archive_root.rglob("ledger.db.*.bak")) + assert len(archives) == 1 + + +def test_moves_bm25_watermark_and_transcript_queues(git_repo: Path) -> None: + """Explicit coverage of the four R3-added sources (the locator added + these resolvers but pre-#368 builds wrote them under `/.bicameral/` + — the migration must move them with byte-for-byte fidelity).""" + seeded = _seed_full_legacy_layout(git_repo) + pdir = _project_dir(git_repo) + + rc = migrate_state.main(["--repo", str(git_repo), "--auto"]) + assert rc == 0 + assert (pdir / "bm25_index.pkl").read_bytes() == seeded[".bicameral/local/bm25_index.pkl"] + assert (pdir / "watermark").read_bytes() == seeded[".bicameral/local/watermark"] + assert (pdir / "pending-transcripts" / "sess1.jsonl").exists() + assert (pdir / "pending-transcripts" / "sess2.jsonl").exists() + assert (pdir / "processed-transcripts" / "sess0.jsonl").exists() + + +def test_legacy_user_global_ledger_moves_on_first_project( + git_repo: Path, tmp_path: Path, monkeypatch +) -> None: + """The v0.15.x layout put `~/.bicameral/ledger.db` outside any project + namespace. The first project's migrate-state claims it.""" + monkeypatch.setenv("HOME", str(tmp_path)) + import importlib + + import cli.migrate_state as ms + import ledger_locator + + importlib.reload(ledger_locator) + importlib.reload(ms) + + legacy = tmp_path / ".bicameral" / "ledger.db" + legacy.parent.mkdir(parents=True, exist_ok=True) + legacy.write_bytes(b"user-global-ledger") + + rc = ms.main(["--repo", str(git_repo), "--auto"]) + assert rc == 0 + pdir = ledger_locator.project_dir_for(git_repo) + assert (pdir / "ledger.db").read_bytes() == b"user-global-ledger" + assert not legacy.exists() + + +def test_legacy_user_global_ledger_already_claimed_is_noop( + git_repo: Path, tmp_path: Path, monkeypatch +) -> None: + """If the user-global ledger no longer exists (already moved on a + prior project) and the current project doesn't have its own legacy + state, migrate runs as a no-op.""" + monkeypatch.setenv("HOME", str(tmp_path)) + import importlib + + import cli.migrate_state as ms + import ledger_locator + + importlib.reload(ledger_locator) + importlib.reload(ms) + + # ~/.bicameral/ledger.db does NOT exist; just make sure the rest of + # the layout is empty too. + rc = ms.main(["--repo", str(git_repo), "--auto"]) + assert rc == 0 + + +def test_partitions_config_yaml_keys(git_repo: Path) -> None: + """R4 addition: when `/.bicameral/config.yaml` predates the split, + migrate-state partitions the per-operator keys into operator.yaml and + keeps the team-identity keys in config.yaml.""" + (git_repo / ".bicameral").mkdir(exist_ok=True) + (git_repo / ".bicameral" / "config.yaml").write_text( + "mode: team\n" + "guided: true\n" + "telemetry: false\n" + "channel: stable\n" + "team:\n" + " backend: google_drive\n" + " folder_id: x\n" + " role: member\n", + encoding="utf-8", + ) + pdir = _project_dir(git_repo) + + rc = migrate_state.main(["--repo", str(git_repo), "--auto"]) + assert rc == 0 + + import yaml + + team = yaml.safe_load((git_repo / ".bicameral" / "config.yaml").read_text()) + op = yaml.safe_load((pdir / "operator.yaml").read_text()) + + assert team["mode"] == "team" + assert team["team"] == {"backend": "google_drive", "folder_id": "x"} + # Per-operator keys must leave the committed file. + for k in ("guided", "telemetry", "channel"): + assert k not in team + + assert op["guided"] is True + assert op["telemetry"] is False + assert op["channel"] == "stable" + assert op["team"] == {"role": "member"} From 362448296a92dfce2168011299f7dbc4c3c70883 Mon Sep 17 00:00:00 2001 From: jinhongkuan Date: Mon, 18 May 2026 20:15:37 -0700 Subject: [PATCH 8/8] style: ruff format the #368 diff to satisfy CI format gate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CI `ruff + mypy` failed `ruff format --check` on 10 files in the diff. Running `ruff format` and re-staging — pure whitespace / line-wrap reflow, no semantics changed; 25/25 tests across gc + migrate_state + run_config_wizard + setup_wizard_git_native still pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- cli/gc.py | 10 ++++++--- cli/migrate_state.py | 24 +++++++-------------- setup_wizard.py | 13 +++--------- tests/test_gc.py | 4 +--- tests/test_ledger_locator.py | 26 ++++++----------------- tests/test_ledger_locator_origin_guard.py | 4 +++- tests/test_migrate_state.py | 8 ++----- tests/test_run_config_wizard.py | 12 +++-------- tests/test_session_end_queue_writer.py | 1 + tests/test_setup_wizard_git_native.py | 8 ++----- 10 files changed, 37 insertions(+), 73 deletions(-) diff --git a/cli/gc.py b/cli/gc.py index 08f02483..998a8684 100644 --- a/cli/gc.py +++ b/cli/gc.py @@ -63,9 +63,13 @@ def _print_table(rows: list[tuple[str, Path, Status, str | None]]) -> None: def _confirm_delete(project_dir: Path, status: Status, origin: str | None) -> bool: """Per-item prompt. Empty / `y` / `yes` confirms; anything else declines.""" try: - response = input( - f" Delete {project_dir.name} ({status} — origin={origin or 'unreadable'})? [y/N]: " - ).strip().lower() + response = ( + input( + f" Delete {project_dir.name} ({status} — origin={origin or 'unreadable'})? [y/N]: " + ) + .strip() + .lower() + ) except (EOFError, OSError): return False return response in ("y", "yes") diff --git a/cli/migrate_state.py b/cli/migrate_state.py index afcf6efa..b57002c1 100644 --- a/cli/migrate_state.py +++ b/cli/migrate_state.py @@ -62,9 +62,7 @@ def _archive(dest: Path, archive_dir: Path) -> Path: return archive_path -def _plan_file_moves( - repo: Path, project_dir: Path -) -> list[tuple[Path, Path]]: +def _plan_file_moves(repo: Path, project_dir: Path) -> list[tuple[Path, Path]]: """Plan the (src, dest) pairs for single files (not dir contents).""" plan: list[tuple[Path, Path]] = [] for rel in _FILE_SOURCES: @@ -74,9 +72,7 @@ def _plan_file_moves( return plan -def _plan_dir_contents_moves( - repo: Path, project_dir: Path -) -> list[tuple[Path, Path]]: +def _plan_dir_contents_moves(repo: Path, project_dir: Path) -> list[tuple[Path, Path]]: """Plan (src_file, dest_file) pairs for every file inside the pending/processed-transcripts source dirs.""" plan: list[tuple[Path, Path]] = [] @@ -241,7 +237,7 @@ def _execute_plan( return log -def _cleanup_empty_source_dirs(repo: Path, *, dry_run: bool) -> list[Path] : +def _cleanup_empty_source_dirs(repo: Path, *, dry_run: bool) -> list[Path]: """Remove `/.bicameral/{local,pending-transcripts,processed-transcripts}` if they're empty after the migration. Returns the list of removed dirs.""" removed: list[Path] = [] @@ -294,12 +290,8 @@ def _build_argparser() -> argparse.ArgumentParser: prog="bicameral-mcp migrate-state", description="Move project-scoped state into ~/.bicameral/projects// (#368).", ) - p.add_argument( - "--repo", default=None, metavar="PATH", help="repo path (default: cwd)" - ) - p.add_argument( - "--auto", action="store_true", help="non-interactive — skip the confirm prompt" - ) + p.add_argument("--repo", default=None, metavar="PATH", help="repo path (default: cwd)") + p.add_argument("--auto", action="store_true", help="non-interactive — skip the confirm prompt") p.add_argument("--dry-run", action="store_true", help="plan only; write nothing") p.add_argument( "--archive-dir", @@ -351,9 +343,9 @@ def main(argv: list[str] | None = None) -> int: if not args.auto and not args.dry_run and plan: # Best-effort confirmation. Stdin not a tty → fall through. try: - response = input( - f" Migrate {len(plan)} files into {project_dir}? [y/N]: " - ).strip().lower() + response = ( + input(f" Migrate {len(plan)} files into {project_dir}? [y/N]: ").strip().lower() + ) except (EOFError, OSError): response = "y" if response not in ("y", "yes"): diff --git a/setup_wizard.py b/setup_wizard.py index aba81d07..8ee9dcce 100644 --- a/setup_wizard.py +++ b/setup_wizard.py @@ -1656,9 +1656,7 @@ def _write_collaboration_config( else: resolved_operator_path.parent.mkdir(parents=True, exist_ok=True) config_tmp = config_path.with_suffix(config_path.suffix + ".tmp") - operator_tmp = resolved_operator_path.with_suffix( - resolved_operator_path.suffix + ".tmp" - ) + operator_tmp = resolved_operator_path.with_suffix(resolved_operator_path.suffix + ".tmp") config_tmp.write_text(team_body, encoding="utf-8") operator_tmp.write_text(operator_body, encoding="utf-8") operator_promoted = False @@ -1851,10 +1849,7 @@ def run_setup( team_block = committed["team"] backend = team_block.get("backend") target = team_block.get("folder_id") or team_block.get("remote_root") - print( - f" Detected team config: backend={backend}, " - f"folder={target} ✓ (auto-joining)" - ) + print(f" Detected team config: backend={backend}, folder={target} ✓ (auto-joining)") team_backend = {k: v for k, v in team_block.items() if v is not None} else: print(f" Detected committed config: mode={collab_mode} ✓ (auto-inheriting)") @@ -1867,9 +1862,7 @@ def run_setup( default_committed = _read_committed_config(repo_path, default_branch) if default_committed is not None and default_committed.get("mode") in ("team", "solo"): print() - print( - f" Your branch doesn't have .bicameral/config.yaml, but {default_branch} does." - ) + print(f" Your branch doesn't have .bicameral/config.yaml, but {default_branch} does.") if not _prompt_yes_no( " Continue with fresh setup? (Choosing No exits so you can merge first.)", default=False, diff --git a/tests/test_gc.py b/tests/test_gc.py index 1f29f6b2..94dfa96b 100644 --- a/tests/test_gc.py +++ b/tests/test_gc.py @@ -46,9 +46,7 @@ def test_lists_orphans_and_keeps_live_projects(tmp_path: Path, capsys) -> None: assert "live" in out -def test_delete_prompts_per_item_and_removes_confirmed_ones( - tmp_path: Path, monkeypatch -) -> None: +def test_delete_prompts_per_item_and_removes_confirmed_ones(tmp_path: Path, monkeypatch) -> None: state_root = tmp_path / "projects" a = _seed(state_root, "orphan-aaaa1234567890", tmp_path / "gone-a" / ".git") b = _seed(state_root, "orphan-bbbb1234567890", tmp_path / "gone-b" / ".git") diff --git a/tests/test_ledger_locator.py b/tests/test_ledger_locator.py index 709caa8a..bf0800b4 100644 --- a/tests/test_ledger_locator.py +++ b/tests/test_ledger_locator.py @@ -47,7 +47,7 @@ def test_default_resolves_under_home_bicameral_projects(git_repo: Path) -> None: assert code_graph.name == "code-graph.db" # Same project — ledger and code-graph live side by side. - assert code_graph.parent == Path(url[len("surrealkv://"):]).parent + assert code_graph.parent == Path(url[len("surrealkv://") :]).parent def test_env_override_wins_for_ledger_only(git_repo: Path, monkeypatch) -> None: @@ -150,9 +150,7 @@ def test_resolve_operator_config_path_stable_across_worktrees( """R4: same project + different worktrees → same operator.yaml path.""" import ledger_locator - subprocess.run( - ["git", "commit", "--allow-empty", "-q", "-m", "init"], cwd=git_repo, check=True - ) + subprocess.run(["git", "commit", "--allow-empty", "-q", "-m", "init"], cwd=git_repo, check=True) worktree = tmp_path / "wt2" subprocess.run( ["git", "worktree", "add", "-q", "--detach", str(worktree)], @@ -184,17 +182,13 @@ def test_resolves_derived_state_paths_under_project_dir(git_repo: Path) -> None: assert processed == project_dir / "processed-transcripts" -def test_derived_state_paths_stable_across_worktrees( - git_repo: Path, tmp_path: Path -) -> None: +def test_derived_state_paths_stable_across_worktrees(git_repo: Path, tmp_path: Path) -> None: """R3: derived-state paths must be identical across worktrees of one project (this is the whole point of project-scoping them). """ import ledger_locator - subprocess.run( - ["git", "commit", "--allow-empty", "-q", "-m", "init"], cwd=git_repo, check=True - ) + subprocess.run(["git", "commit", "--allow-empty", "-q", "-m", "init"], cwd=git_repo, check=True) worktree = tmp_path / "wt2" subprocess.run( ["git", "worktree", "add", "-q", "--detach", str(worktree)], @@ -212,9 +206,7 @@ def test_derived_state_paths_stable_across_worktrees( assert fn(repo_path=git_repo) == fn(repo_path=worktree), resolver -def test_derived_state_paths_have_no_env_override( - git_repo: Path, monkeypatch -) -> None: +def test_derived_state_paths_have_no_env_override(git_repo: Path, monkeypatch) -> None: """R3: derived-state paths are NOT user-overridable per call (unlike ledger.db via SURREAL_URL and code-graph.db via CODE_LOCATOR_SQLITE_DB). Setting unrelated overrides must not leak into these paths. @@ -229,11 +221,7 @@ def test_derived_state_paths_have_no_env_override( project_dir = ledger_locator.project_dir_for(repo_path=git_repo) assert ledger_locator.resolve_bm25_index_path(repo_path=git_repo).parent == project_dir assert ledger_locator.resolve_watermark_path(repo_path=git_repo).parent == project_dir + assert ledger_locator.resolve_pending_transcripts_dir(repo_path=git_repo).parent == project_dir assert ( - ledger_locator.resolve_pending_transcripts_dir(repo_path=git_repo).parent - == project_dir - ) - assert ( - ledger_locator.resolve_processed_transcripts_dir(repo_path=git_repo).parent - == project_dir + ledger_locator.resolve_processed_transcripts_dir(repo_path=git_repo).parent == project_dir ) diff --git a/tests/test_ledger_locator_origin_guard.py b/tests/test_ledger_locator_origin_guard.py index 76511997..583e910a 100644 --- a/tests/test_ledger_locator_origin_guard.py +++ b/tests/test_ledger_locator_origin_guard.py @@ -47,7 +47,9 @@ def test_first_resolve_writes_origin_txt(git_repo: Path) -> None: text=True, check=True, ).stdout.strip() - expected = str(Path(common_dir if Path(common_dir).is_absolute() else git_repo / common_dir).resolve()) + expected = str( + Path(common_dir if Path(common_dir).is_absolute() else git_repo / common_dir).resolve() + ) assert content == expected diff --git a/tests/test_migrate_state.py b/tests/test_migrate_state.py index 7bc55776..2feee73d 100644 --- a/tests/test_migrate_state.py +++ b/tests/test_migrate_state.py @@ -100,16 +100,12 @@ def test_idempotent_second_run_is_noop(git_repo: Path, capsys) -> None: pdir = _project_dir(git_repo) migrate_state.main(["--repo", str(git_repo), "--auto"]) capsys.readouterr() # discard first-run output - mtimes_before = { - p: p.stat().st_mtime for p in pdir.rglob("*") if p.is_file() - } + mtimes_before = {p: p.stat().st_mtime for p in pdir.rglob("*") if p.is_file()} rc = migrate_state.main(["--repo", str(git_repo), "--auto"]) assert rc == 0 out = capsys.readouterr().out assert "Nothing to migrate." in out - mtimes_after = { - p: p.stat().st_mtime for p in pdir.rglob("*") if p.is_file() - } + mtimes_after = {p: p.stat().st_mtime for p in pdir.rglob("*") if p.is_file()} assert mtimes_after == mtimes_before, "second run touched destination files" diff --git a/tests/test_run_config_wizard.py b/tests/test_run_config_wizard.py index c04583f5..7a693aa7 100644 --- a/tests/test_run_config_wizard.py +++ b/tests/test_run_config_wizard.py @@ -73,9 +73,7 @@ def ask(self): return self._value -def _install_fake_questionary( - monkeypatch, script: dict[str, object], captured: list[dict] -) -> None: +def _install_fake_questionary(monkeypatch, script: dict[str, object], captured: list[dict]) -> None: """Stub `questionary` so the wizard's interactive prompts return scripted values and record what they were asked.""" import types @@ -116,9 +114,7 @@ def test_editor_reads_from_both_files(git_repo: Path, tmp_path: Path, monkeypatc "mode: team\nteam:\n backend: google_drive\n folder_id: x\n", encoding="utf-8", ) - operator_path.write_text( - "guided: true\ntelemetry: false\nchannel: stable\n", encoding="utf-8" - ) + operator_path.write_text("guided: true\ntelemetry: false\nchannel: stable\n", encoding="utf-8") captured: list[dict] = [] # Empty script → every prompt returns the displayed default → no edits. @@ -157,9 +153,7 @@ def test_editor_writes_to_routed_file(git_repo: Path, tmp_path: Path, monkeypatc encoding="utf-8", ) operator_path.parent.mkdir(parents=True, exist_ok=True) - operator_path.write_text( - "guided: true\ntelemetry: false\nchannel: stable\n", encoding="utf-8" - ) + operator_path.write_text("guided: true\ntelemetry: false\nchannel: stable\n", encoding="utf-8") captured: list[dict] = [] _install_fake_questionary( diff --git a/tests/test_session_end_queue_writer.py b/tests/test_session_end_queue_writer.py index 35fad24d..9c7ee43d 100644 --- a/tests/test_session_end_queue_writer.py +++ b/tests/test_session_end_queue_writer.py @@ -159,6 +159,7 @@ def test_archive_processed_moves_pending_to_processed(tmp_path: Path) -> None: assert dst.name == "sess1.jsonl" assert dst.read_text(encoding="utf-8") == "payload-A" from events.transcript_queue import _processed_root + assert dst.parent == _processed_root(str(repo)) diff --git a/tests/test_setup_wizard_git_native.py b/tests/test_setup_wizard_git_native.py index 02ff051e..7186a276 100644 --- a/tests/test_setup_wizard_git_native.py +++ b/tests/test_setup_wizard_git_native.py @@ -97,9 +97,7 @@ def _explode(*a, **kw): # would only fire if the prompt path is taken monkeypatch.setattr(setup_wizard, "_install_for_agent", lambda *a, **kw: None) monkeypatch.setattr(setup_wizard, "_install_skills", lambda *a, **kw: 0) monkeypatch.setattr(setup_wizard, "_install_claude_hooks", lambda *a, **kw: False) - monkeypatch.setattr( - setup_wizard, "_install_user_permissions_allowlist", lambda *a, **kw: False - ) + monkeypatch.setattr(setup_wizard, "_install_user_permissions_allowlist", lambda *a, **kw: False) rc = setup_wizard.run_setup(repo_hint=str(git_repo)) assert rc == 0 @@ -121,9 +119,7 @@ def _explode(*a, **kw): monkeypatch.setattr(setup_wizard, "_install_for_agent", lambda *a, **kw: None) monkeypatch.setattr(setup_wizard, "_install_skills", lambda *a, **kw: 0) monkeypatch.setattr(setup_wizard, "_install_claude_hooks", lambda *a, **kw: False) - monkeypatch.setattr( - setup_wizard, "_install_user_permissions_allowlist", lambda *a, **kw: False - ) + monkeypatch.setattr(setup_wizard, "_install_user_permissions_allowlist", lambda *a, **kw: False) rc = setup_wizard.run_setup(repo_hint=str(git_repo)) assert rc == 0