From 836b858ac1014fe53d9e0a343bdf53fc3e9e0521 Mon Sep 17 00:00:00 2001 From: jinhongkuan Date: Fri, 8 May 2026 22:51:06 -0700 Subject: [PATCH 1/3] =?UTF-8?q?feat(team-mode):=20remote=20event-log=20ada?= =?UTF-8?q?pter=20=E2=80=94=20Drive=20+=20LocalFolder=20backends=20(#277)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #277. Implements v0 Productization §2: shifts team mode entirely off git as the inter-machine replication substrate, onto a pluggable backend with two ship-day implementations (LocalFolder, GoogleDrive). Pull-only sync; no daemons, no webhooks, no Bicameral server in the loop. What changes for users - Setup wizard team-mode branch now offers Create vs Join vs LocalFolder. Create: provisions a Drive folder under the operator's Google account, prints the literal share-text-to-teammates message. Join: paste folder ID/URL, OAuth, verify access (404 / read-only both block), confirm the resolved signer (default-No) before persisting. LocalFolder: single prompt for the path. - Drive integration uses Bicameral's bundled OAuth client (the same pattern gh / gcloud / cursor use). Scope: drive.file only — Bicameral's CLI can only see files it creates inside the team folder. Token cache at ~/.bicameral/google-drive-token.json mode 0600. - Colored security disclosure renders before the browser opens, walking the operator through what flows where, what we do and don't see, and the trust dependency. Mirrored on bicameral-ai.com/privacy (BicameralAI/bicameral PR #111). Architecture - events/backends/__init__.py — BackendAdapter ABC + get_backend factory. - events/backends/local_folder.py — sha256-idempotent LocalFolderAdapter. - events/backends/google_drive.py — Drive Files API adapter; bundled client_id + client_secret (RFC 8252 native-app pattern, no env override per Option A); FolderNotFoundError / ReadOnlyAccessError surface for Join verify_access; create_folder helper for Create branch. - events/team_adapter.py — TeamWriteAdapter accepts backend=, marks _dirty on every write, exposes flush_to_backend(). - adapters/ledger.py — _read_collaboration_mode refactored to _read_team_config(repo_path) -> dict; constructs backend and injects into TeamWriteAdapter. - handlers/sync_middleware.py — ensure_team_synced (30 s TTL pull) + flush_team_writes (post-handler push); errors swallowed at DEBUG. - server.py — wires both into the dispatch site (pull at top, flush in finally). - setup_wizard.py — Create/Join/LocalFolder dispatch + colored security disclosure + identity-confirmation prompt at Join time. Testing - 53 new tests, 1 platform-skip (Windows-only path): - LocalFolderAdapter: 6 tests (push idempotency, pull peer-files-only, list_peers, lock serialization) - TeamWriteAdapter ↔ backend: 3 tests (connect-pulls-then-replays, write-marks-dirty-then-flush-pushes, no-backend-noop) - Two-author round-trip: 2 tests - Sync middleware: 5 tests (TTL cache, no-backend-noop, error swallowing) - GoogleDriveAdapter: 11 tests (push idempotency on md5, pull own-file-skip + max-modifiedTime token, lock create-then-delete + cleanup on exception, verify_access 404 / read-only / can-edit, create_folder, placeholder-detection auto-skip when bundled client is published) - Setup wizard Create/Join: 11 tests including identity decline, OAuth-disclosure decline, folder-id URL extraction, unwritable-path rejection - All adjacent regression tests still pass (test_team_event_replay, test_event_writer). - Lint clean across events/ adapters/ handlers/sync_middleware.py setup_wizard.py + new test files. Security model (also documented at docs/team-mode-setup.md and on bicameral-ai.com/privacy) - Decision data flows your-CLI ↔ Google directly. Bicameral the company does NOT receive copies. No Bicameral server in the loop. - drive.file scope limits the CLI on the user's machine to files it creates in the team folder. The rest of the user's Drive is invisible to the CLI; Google enforces this server-side. - As OAuth app publisher, Bicameral receives aggregate API request counts and per-user OAuth consent records (which Google accounts authenticated, when). Not contents. - Trust dependency: same as any OAuth tool (gh, gcloud, Notion, Slack desktop) — open-source CLI behaves as advertised, mitigated by source visibility. OAuth verification submission text + GCP setup checklist: docs/google-oauth-verification-submission.md. Co-Authored-By: Claude Opus 4.7 (1M context) --- CHANGELOG.md | 4 + README.md | 43 +- adapters/ledger.py | 42 +- docs/google-oauth-verification-submission.md | 167 +++++++ docs/team-mode-setup.md | 201 ++++++++ events/backends/__init__.py | 65 +++ events/backends/google_drive.py | 319 +++++++++++++ events/backends/local_folder.py | 72 +++ events/team_adapter.py | 33 +- handlers/sync_middleware.py | 49 ++ pyproject.toml | 3 + requirements.txt | 4 + server.py | 10 +- setup_wizard.py | 236 ++++++++- tests/test_backends_google_drive_unit.py | 257 ++++++++++ tests/test_backends_local_folder.py | 115 +++++ tests/test_setup_wizard_team_backend.py | 237 +++++++++ tests/test_sync_middleware_team.py | 121 +++++ tests/test_team_adapter_with_backend.py | 138 ++++++ tests/test_team_round_trip_local_folder.py | 106 +++++ .../2026-05-08-remote-event-log-adapter.md | 448 ++++++++++++++++++ 21 files changed, 2651 insertions(+), 19 deletions(-) create mode 100644 docs/google-oauth-verification-submission.md create mode 100644 docs/team-mode-setup.md create mode 100644 events/backends/__init__.py create mode 100644 events/backends/google_drive.py create mode 100644 events/backends/local_folder.py create mode 100644 tests/test_backends_google_drive_unit.py create mode 100644 tests/test_backends_local_folder.py create mode 100644 tests/test_setup_wizard_team_backend.py create mode 100644 tests/test_sync_middleware_team.py create mode 100644 tests/test_team_adapter_with_backend.py create mode 100644 tests/test_team_round_trip_local_folder.py create mode 100644 thoughts/shared/plans/2026-05-08-remote-event-log-adapter.md diff --git a/CHANGELOG.md b/CHANGELOG.md index eac70ea1..b7f133f1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,10 @@ All notable changes to bicameral-mcp are tracked here. Format loosely follows ## [Unreleased] +### Added + +- **Team-mode remote event-log adapter (#277, v0 §2 productization).** Team mode now replicates decisions across teammates' machines via a pluggable `BackendAdapter` (`events/backends/__init__.py`), with two ship-day backends: `LocalFolderAdapter` (shared filesystem — NFS/Dropbox/syncthing) and `GoogleDriveAdapter` (per-team folder with a bundled OAuth client). Pull-only sync — no daemons, no webhooks, no Bicameral server in the loop. `TeamWriteAdapter` gains `flush_to_backend()`; `handlers/sync_middleware.py` adds `ensure_team_synced` (30 s TTL pull) + `flush_team_writes` (post-handler push), wired into `server.py` dispatch (pull at top, flush in `finally`). Setup wizard splits team-mode into Create-vs-Join branches: Create makes the Drive folder + prints the literal share-text-to-teammates message; Join verifies access (404/read-only both block) and confirms the operator's resolved signer (default-No) before persisting. Drive scope narrowed to `drive.file` — Bicameral's CLI can only see files it created in the team folder. Colored security disclosure renders before browser open; mirrors the bicameral-ai.com/privacy page. Operator walkthrough at `docs/team-mode-setup.md`; OAuth verification submission text at `docs/google-oauth-verification-submission.md`. 53 new tests (Phase 1 LocalFolder + sync_middleware + TeamWriteAdapter wiring + two-author round-trip; Phase 2 GoogleDrive unit tests with mocked Drive client; Phase 3 wizard Create/Join/LocalFolder branches). + ### Fixed - **`handlers/bind.py`: caller-supplied line range cannot bypass symbol verification (#280, M2 grounding precision regression).** Pre-fix, when a caller supplied `start_line`/`end_line` alongside `symbol_name`, the handler verified only that the file existed at the SHA and accepted any `symbol_name` — silent corruption surface for caller-LLM grounding when the agent hallucinated a wrong symbol on a real file. Branch B now also calls `resolve_symbol_lines` (same tree-sitter path Branch A uses) and rejects two cases: (1) `symbol_name` doesn't resolve at all → `error="symbol '...' not found in at — caller-supplied line range cannot bypass symbol verification (#280)"`; (2) symbol resolves but the caller-supplied span doesn't overlap the resolved span → `error="span mismatch (#280)"`. Overlap (not exact equality) is the matching rule, so legitimate sub-region binds stay accepted; only hallucinated ranges are rejected. diff --git a/README.md b/README.md index 53833edf..1a2775af 100644 --- a/README.md +++ b/README.md @@ -74,6 +74,45 @@ bicameral-mcp --smoke-test --- +## Solo or Team mode + +Bicameral runs in two modes; the setup wizard asks you which one at install time. + +| | Solo | Team | +|---|---|---| +| **Best for** | Individual builders; small projects with one decision-maker | Multiple people writing decisions for the same codebase (PMs, designers, multiple engineers) | +| **Where decisions live** | Local SurrealDB at `.bicameral/ledger.db`; not shared | One append-only `.jsonl` per teammate; replicated via a remote substrate you provision | +| **Who can ingest** | You | Anyone on the shared substrate (PM ingests a PRD, dev pulls and surfaces it on `preflight`) | +| **What gets shared** | Nothing leaves your machine | Decision payloads, canonical IDs, signoffs. **No source code** | +| **How replication works** | N/A | Pull-only on tool invocation (~30 s freshness). No daemons, no webhooks, no central server | +| **Failure if remote is down** | N/A | Falls back to local; resync next call. No blocking | + +### Team-mode remote substrate + +Team mode replicates events through a substrate **you provision and own** — +nothing routes through a Bicameral-operated server, and your decision data +never crosses our infrastructure. The wizard supports two backends: + +| Backend | Substrate | Setup | +|---|---|---| +| **Google Drive** (default) | A folder in your team's Google account. Each teammate writes to their own `.jsonl`; everyone reads the rest. | 3-minute one-time OAuth client setup, then Create-or-Join in the wizard. | +| **Local folder** (advanced) | A directory mounted on every teammate's machine (NFS, Dropbox, syncthing). | One prompt for the path. | + +S3, Dropbox-native, and Box backends are on the roadmap but not yet shipped — we +deliberately ship Drive first, validate the model with paying teams, then extend. + +The Drive integration is scoped to `drive.file` — the Bicameral CLI on your +machine can only touch files it creates inside the team folder; the rest of your +Drive (other folders, Google Docs, shared files) is invisible to the CLI. +Decision data flows your-CLI ↔ Google directly; **Bicameral the company does +not receive copies of your files**. We do see aggregate OAuth telemetry (API +request counts, OAuth consent records — not contents) as the OAuth app +publisher, the same way any OAuth-using tool's vendor does. Token cache lives +at `~/.bicameral/google-drive-token.json`, mode 0600. Full security posture +and operator walkthrough: [`docs/team-mode-setup.md`](docs/team-mode-setup.md). + +--- + ## Slash Commands After setup, Claude Code gets these slash commands: @@ -95,8 +134,10 @@ The agent also fires these automatically — `preflight` before any code change, | File | What it is | |---|---| | `.mcp.json` | MCP server config for Claude Code | -| `.bicameral/config.yaml` | Mode (`solo`/`team`) and guided-mode flag | +| `.bicameral/config.yaml` | Mode (`solo`/`team`), guided-mode flag, and (in team mode) `team.backend` + `team.folder_id`/`team.remote_root` + `team.role` | | `.bicameral/ledger.db` | Local SurrealDB decision ledger (solo mode) | +| `.bicameral/events/.jsonl` | Append-only event log per teammate (team mode) | +| `~/.bicameral/google-drive-token.json` | Drive OAuth token cache, mode 0600 (team mode + Drive backend only) | | `.gitignore` entry | Ignores `.bicameral/` in solo mode | | `.claude/settings.json` | PostToolUse hook (auto-sync after commits) + SessionEnd hook (capture mid-session decisions) | | `.claude/skills/bicameral-*/SKILL.md` | Slash commands | diff --git a/adapters/ledger.py b/adapters/ledger.py index 71341c5b..5fff795f 100644 --- a/adapters/ledger.py +++ b/adapters/ledger.py @@ -22,31 +22,32 @@ _real_ledger_instance = None -def _read_collaboration_mode(repo_path: str) -> str: - """Read mode from .bicameral/config.yaml (returns 'solo' or 'team'). +def _read_team_config(repo_path: str) -> dict: + """Read .bicameral/config.yaml as a parsed dict. + Returns ``{"mode": "solo"}`` when the file is absent or unparseable. Checks BICAMERAL_DATA_PATH first so history stored in a private parent repo is discovered even when REPO_PATH points to a public submodule. """ data_path = os.getenv("BICAMERAL_DATA_PATH", repo_path) config_path = Path(data_path) / ".bicameral" / "config.yaml" if not config_path.exists(): - return "solo" + return {"mode": "solo"} try: import yaml - config = yaml.safe_load(config_path.read_text(encoding="utf-8")) or {} - return config.get("mode", "solo") + cfg = yaml.safe_load(config_path.read_text(encoding="utf-8")) + return cfg if isinstance(cfg, dict) else {"mode": "solo"} except Exception: - # yaml not installed or bad file — fall back to basic parsing + # yaml not installed or bad file — fall back to mode-only parse try: for line in config_path.read_text(encoding="utf-8").splitlines(): line = line.strip() if line.startswith("mode:"): - return line.split(":", 1)[1].strip().strip("\"'") + return {"mode": line.split(":", 1)[1].strip().strip("\"'")} except OSError: pass - return "solo" + return {"mode": "solo"} def get_ledger(): @@ -64,9 +65,11 @@ def get_ledger(): ) repo_path = os.getenv("REPO_PATH", ".") - mode = _read_collaboration_mode(repo_path) + cfg = _read_team_config(repo_path) + mode = cfg.get("mode", "solo") if mode == "team": + from events.backends import get_backend from events.materializer import EventMaterializer from events.team_adapter import TeamWriteAdapter from events.writer import EventFileWriter, _get_git_email @@ -83,8 +86,25 @@ def get_ledger(): writer = EventFileWriter(events_dir, author) materializer = EventMaterializer(events_dir, local_dir) - _real_ledger_instance = TeamWriteAdapter(inner, writer, materializer) - logger.info("[ledger] team mode — events at %s (author: %s)", events_dir, author) + cfg.setdefault("team", {})["author"] = author + try: + backend = get_backend(cfg) + except Exception as exc: + logger.warning( + "[ledger] team backend init failed (%s) — continuing local-only", exc + ) + backend = None + + _real_ledger_instance = TeamWriteAdapter( + inner, writer, materializer, backend=backend + ) + backend_kind = (cfg.get("team") or {}).get("backend") or "local-only" + logger.info( + "[ledger] team mode — events at %s (author: %s, backend: %s)", + events_dir, + author, + backend_kind, + ) else: _real_ledger_instance = inner diff --git a/docs/google-oauth-verification-submission.md b/docs/google-oauth-verification-submission.md new file mode 100644 index 00000000..0c279c78 --- /dev/null +++ b/docs/google-oauth-verification-submission.md @@ -0,0 +1,167 @@ +# Google OAuth verification submission — Bicameral MCP + +This is the operator-facing checklist + ready-to-paste text for getting +Bicameral's Drive OAuth client verified by Google. Run through this once +per project lifetime. Until verification clears, users see a "Google +hasn't verified this app" interstitial — they can click through (advanced +→ "go to Bicameral (unsafe)") but it scares non-technical users. + +## Prerequisites (already done) + +- [x] GCP project `bicameral-mcp` created +- [x] Linked to billing account `01647A-138E06-EE9A8B` +- [x] Drive API enabled + +## Web-console steps + +### 1. OAuth consent screen + +URL: https://console.cloud.google.com/apis/credentials/consent?project=bicameral-mcp + +| Field | Value | +|---|---| +| User Type | **External** | +| App name | `Bicameral` | +| User support email | `support@bicameral-ai.com` (or `jin@bicameral-ai.com` while support@ isn't set up) | +| App logo | Upload `assets/bicameral-icon-120.png` (must be 120×120 PNG, under 1 MB). If we don't have one yet, leave blank — Google won't block submission, but verified apps look more legitimate with a logo | +| Application home page | `https://bicameral-ai.com` | +| Application privacy policy | `https://bicameral-ai.com/privacy` | +| Application terms of service | `https://bicameral-ai.com/terms` | +| Authorized domains | `bicameral-ai.com` | +| Developer contact email | `jin@bicameral-ai.com` | + +### 2. Scopes + +Add **only** this one scope: + +- `https://www.googleapis.com/auth/drive.file` — "View and manage Google Drive files and folders that you have opened or created with this app." + +> Critical: do NOT add `drive`, `drive.readonly`, `drive.metadata`, or any +> other Drive scope. `drive.file` is non-sensitive — it skips the "restricted +> scope" review path and is much faster to verify (days vs weeks). + +### 3. Test users (during dev / before verification) + +Add yourself + any teammates running the unverified flow: + +- `jin@bicameral-ai.com` +- (anyone else from the bicameral-ai.com domain) + +Once verified, this list is unused — anyone with a Google account can +authenticate. + +### 4. OAuth client (Desktop app) + +URL: https://console.cloud.google.com/apis/credentials?project=bicameral-mcp + +- Click **Create Credentials → OAuth client ID**. +- Application type: **Desktop app**. +- Name: `Bicameral CLI` (Google-internal label, not user-visible). +- Click Create. Download the JSON. + +Open the JSON. Copy `client_id` and `client_secret` into +`events/backends/google_drive.py` — replace these constants: + +```python +_BUNDLED_CLIENT_ID = "REPLACE_WITH_BICAMERAL_DRIVE_OAUTH_CLIENT_ID.apps.googleusercontent.com" +_BUNDLED_CLIENT_SECRET = "REPLACE_WITH_BICAMERAL_DRIVE_OAUTH_CLIENT_SECRET" +``` + +Commit. Push. Cut a release. + +### 5. Verification submission + +Required because we're publishing externally and want the consent screen +without the unverified-app warning. URL: +https://console.cloud.google.com/apis/credentials/consent?project=bicameral-mcp +→ click **Publish App** → submission form opens. + +#### Verification justification — paste this + +**App functionality** + +> Bicameral is an open-source MCP (Model Context Protocol) server for AI +> coding assistants. It maintains a local decision ledger that maps +> meeting decisions to code regions. In team mode, the CLI uses Google +> Drive as a pull-only replication substrate so teammates' decision logs +> sync between machines without operating a central server. + +**Why each scope is needed** + +> `drive.file` — Bicameral creates one append-only JSONL event log per +> teammate (`.jsonl`) inside a single shared folder the team +> creates. Each user's CLI reads peer files (created by other Bicameral +> instances within the same folder) and writes their own. Bicameral never +> needs access to the user's other Drive files, so the narrow `drive.file` +> scope is sufficient. + +**Demo video script (2-3 minutes; record once)** + +1. Open Bicameral landing page (`https://bicameral-ai.com`). +2. Show terminal: `bicameral-mcp setup`. +3. When wizard asks "How do you want to set up the shared ledger?", select + "Create a new shared ledger". +4. Cut to the colored security disclosure the wizard prints — pause on + screen for 3 seconds so reviewers see it. +5. Browser opens, OAuth consent screen appears. Click Allow. +6. Cut back to terminal — wizard prints folder ID, instructions to share + with teammates. +7. Open Drive in another tab — show the new `bicameral--ledger` + folder. Show the empty folder. +8. In the terminal, ingest a small decision (e.g. `bicameral-mcp ingest + --text "We decided to ship pull-only sync"`). Show that it succeeded. +9. Cut to Drive — refresh the folder. Show the new `.jsonl` file + (one event line). Open it briefly to show the JSON event structure. +10. Cut to the privacy policy page in the browser — pause on the section + that explains what Bicameral can and cannot see. + +Upload the unlisted YouTube video link to the verification form. + +**Privacy policy must include** + +> Bicameral collects no personal data through the Google Drive +> integration. The Drive OAuth flow grants Bicameral the `drive.file` +> scope, which permits the application to read and write only files it +> creates within the shared folder you provision. Bicameral does not +> upload, transmit, or store user data on any Bicameral-operated server; +> all decision data is replicated peer-to-peer through your team's own +> Google Drive folder. As the OAuth application owner, Bicameral receives +> aggregate API usage analytics from Google (e.g. request counts) and +> per-user OAuth consent records (which Google accounts authenticated +> against the Bicameral app). We do not link this telemetry to any +> identifying information beyond the Google account that authenticated +> and we do not share it with third parties. + +(Adapt to fit the rest of the bicameral-ai.com privacy policy structure.) + +#### Submit + +Click **Submit for verification**. Google replies in 1-2 days for +non-sensitive `drive.file` scope; sometimes longer if they ask follow-up +questions. Reply promptly — slow operator response is the #1 reason +verification stalls. + +## After verification + +- Replace the placeholder constants in `events/backends/google_drive.py` + with the published `client_id` + `client_secret` (if not already done in + step 4). +- Update `tests/test_backends_google_drive_unit.py::test_bundled_client_config_raises_when_placeholders_present` + — it'll auto-skip once placeholders are gone, but you can also rewrite + it to assert the published config dict is well-formed. +- Remove the "Provision an OAuth client" section from + `docs/team-mode-setup.md` (already done — that section was removed when + we pivoted from operator-supplied to bundled client). +- Cut a release; users get the 1-click flow with no unverified-app + warning. + +## If verification gets denied + +Most common reasons + fixes: + +| Reason | Fix | +|---|---| +| Privacy policy doesn't mention Google scopes | Add the boilerplate paragraph above | +| Demo video doesn't show the OAuth flow | Re-record with the consent screen visible | +| Application home page doesn't link to OAuth-using product | Make sure the landing page mentions the team-mode feature | +| Trademark concern with "Bicameral" | Provide trademark documentation if challenged (we own the domain) | diff --git a/docs/team-mode-setup.md b/docs/team-mode-setup.md new file mode 100644 index 00000000..7fd44c31 --- /dev/null +++ b/docs/team-mode-setup.md @@ -0,0 +1,201 @@ +# Team mode setup + +Team mode replicates your decision ledger across teammates' machines so a +PM can ingest a PRD and your engineers see those decisions in their next +`bicameral.preflight` call. Same pull-only, append-only event-log model as +solo mode — just with a shared remote substrate the wizard provisions for +you. + +This page walks you through setup end-to-end, covering both backends, the +OAuth client provisioning step, the security posture, and verification. + +## Backends + +| Backend | When to use | What you provision | +|---|---|---| +| `google_drive` | Default. Anyone with a Google account, no shared filesystem required. | A Google Drive folder + an OAuth client (3 minutes, one-time per machine). | +| `local_folder` | Advanced. Your team already has a shared filesystem (NFS, Dropbox, syncthing). | A directory path everyone has mounted. | + +Both backends carry the same wire shape: one append-only `.jsonl` +file per teammate, written-by-author / read-by-everyone, deduplicated at +the database layer via canonical IDs. There is no central server. + +## Create vs Join + +Team-mode setup branches into two flows: + +- **Create** — you're the first teammate. The wizard creates a Drive folder + on your account, prints the folder ID, and tells you the literal text to + send your teammates. You become the *founding member* (recorded in + `config.yaml`); the Drive folder ACL is governed by your Google account. +- **Join** — a teammate has already created the shared folder. They send + you the folder ID; you paste it; the wizard verifies your access (404 → + not shared yet, read-only → ask for Editor) and confirms how you'll + appear in the ledger before persisting. + +For LocalFolder there is no Create vs Join distinction — filesystem ACLs +on the shared directory determine who's in the team. + +## OAuth — what happens, what we see + +Bicameral ships with its own Google OAuth client (the same pattern `gh`, +`gcloud`, and `cursor` use). When you run `bicameral-mcp setup` and pick +Create or Join, the wizard prints a colored security disclosure **before** +opening your browser, then triggers the standard Google consent flow on +`localhost`. Click Allow once; you're done. + +### What flows where + +- **Decision data (transcripts, payloads)** flows your-CLI ↔ Google + directly. Bicameral the company does NOT receive copies. No Bicameral + server is in the loop. +- **Your OAuth token** lives at `~/.bicameral/google-drive-token.json`, + mode 0600, on your machine. + +### What the `drive.file` scope means for the rest of your Drive + +The Bicameral CLI on your machine can only touch files it creates in the +team folder. Your other Drive content (other folders, Google Docs, shared +files) is invisible to the CLI — Google enforces this server-side. This +is the protection the `drive.file` scope is designed to give. + +### What Bicameral the company CAN see (as the OAuth app publisher) + +As the publisher of the OAuth client, we receive limited telemetry from +Google's OAuth dashboard: + +- **Aggregate API request counts.** Not contents. ("5,000 Drive API + calls last week from this OAuth client.") +- **OAuth consent records.** Which Google accounts authenticated against + the Bicameral app, and when. + +We do NOT receive: file contents, file names, folder names, folder IDs, +team membership, or who is collaborating with whom on what. + +### The trust dependency you're accepting + +The OAuth flow itself can't leak file contents to us — your token stays +on your machine, and Drive API calls bypass our infrastructure entirely. +The realistic threat is *"what if Bicameral pushed a malicious CLI +update that read transcripts and POSTed them to bicameral-ai.com?"* — +the same trust dependency you accept with any OAuth tool you install +(`gh`, `gcloud`, Notion, Slack desktop, Cursor, etc.). The mitigation is +that Bicameral is open source: any exfiltration code would be visible in +the diff. Source: +[github.com/BicameralAI/bicameral-mcp](https://github.com/BicameralAI/bicameral-mcp). + +## Run setup — Create flow + +``` +$ bicameral-mcp setup + … +? Collaboration mode: Team — decisions shared via git (append-only event files) +? How do you want to set up the shared ledger? + ❯ Create a new shared ledger (you become the founding member) + Join an existing shared ledger (paste a folder ID from a teammate) + Use a shared filesystem instead (NFS, Dropbox, syncthing) — advanced + + [browser opens for Google OAuth, you grant Drive access] + + Created shared ledger folder: bicameral-myrepo-ledger + Folder ID: 1AbCdEfGhIjKl_mNoPqRsTuVwXyZ-abcd + URL: https://drive.google.com/drive/folders/1AbCdEfGhIjKl_mNoPqRsTuVwXyZ-abcd + + Send this to your teammates so they can Join: + "Share this folder with my teammate as Editor, then run `bicameral + setup` and paste this folder ID: 1AbCdEfGhIjKl_mNoPqRsTuVwXyZ-abcd" +``` + +After Create completes, share the Drive folder with each teammate as +**Editor** in the Drive UI — the wizard does not auto-share, because your +Google account governs the ACL. + +## Run setup — Join flow + +``` +$ bicameral-mcp setup + … +? How do you want to set up the shared ledger? + Create a new shared ledger + ❯ Join an existing shared ledger (paste a folder ID from a teammate) + Use a shared filesystem (advanced) + +? Paste the shared ledger folder ID (or full Drive URL) from your teammate: + 1AbCdEfGhIjKl_mNoPqRsTuVwXyZ-abcd + + [browser opens for Google OAuth] + [verify_access checks the folder is shared and writable] + +? You'll appear in the team ledger as `alice`. Continue? [y/N] + y +``` + +The identity confirmation defaults to **No**. If the resolved signer +(governed by `signer_email_fallback` in `config.yaml`) doesn't match how +you want to appear in the team ledger, decline, edit your config, and +re-run. + +## Verifying replication + +1. On machine A: `bicameral.ingest` a small decision (e.g. one line of + meeting notes). +2. Wait up to 30 seconds (the in-process pull TTL). +3. On machine B: run any tool (`bicameral.history` is convenient). The + decision should appear. + +If it doesn't: + +- Check `tail -F ~/.bicameral/local/bicameral.log` on B for `[gdrive]` or + `[sync_middleware]` warnings. +- Confirm B's account has Editor on the Drive folder. +- The 30 s TTL is a per-process cache; restarting the MCP server clears it. + +## Permissions and revocation + +| Action | Effect | +|---|---| +| Founding member shares folder as Editor | Teammate can Create + read peer files. | +| Founding member shares folder as Reader | Join wizard fails with `ReadOnlyAccessError` (won't persist config). | +| Founding member revokes a teammate's access | Their next `push_events` call silently fails (DEBUG-logged); their existing `.jsonl` in the folder remains until manually deleted. Their local DB still has every decision they ever ingested or pulled — event logs are append-only and durable. | + +## Privacy posture + +- **Token cache.** `~/.bicameral/google-drive-token.json`, mode 0600 on + POSIX. Contains a refresh token; treat it like an SSH key. +- **OAuth scope.** `https://www.googleapis.com/auth/drive.file` only — + Bicameral can read/write only files it created or that you opened + through Bicameral. Other Drive content is invisible. +- **Author identity in the ledger.** Each event's `author` field is your + resolved signer (governed by `signer_email_fallback` in `config.yaml`): + - `redact` — ``, no attribution. + - `local-part-only` (default) — the part before `@`. Privacy-positive. + - `full` — verbatim email. Opt-in. +- **What's in the JSONL.** Decision payloads, canonical IDs, signoffs, + region descriptors. No source code is uploaded. + +## Local-folder backend + +Single prompt, no OAuth. Pick a path everyone has mounted (NFS, +Dropbox, syncthing, etc.): + +``` +? How do you want to set up the shared ledger? + ❯ Use a shared filesystem (advanced) + +? Path to the shared folder (must exist on every teammate's machine): + /Volumes/team-share/bicameral-myrepo +``` + +Filesystem ACLs determine team membership. Same per-author JSONL layout, +same TTL-cached pull on tool dispatch. + +## Troubleshooting + +| Symptom | Likely cause | Fix | +|---|---|---| +| Join wizard exits with `Drive folder ... not found` | Folder ID typo OR founding member hasn't shared yet | Confirm ID; ask founding member to share as Editor. | +| Join wizard exits with `read-only for this account` | You were granted Viewer, not Editor | Ask founding member to upgrade your role in the Drive UI. | +| `OAuthClientNotProvisionedError` on first run | You're running a dev build before the bundled OAuth client was published — file an issue. | Wait for the next release, or build from a tag with the published client. | +| "Google hasn't verified this app" warning | Bicameral's OAuth app verification is still pending with Google. | Click "Advanced" → "Go to Bicameral (unsafe)" — the app is published; verification badge clears once Google completes review. | +| Peer events don't appear after 30 s | Pull cache TTL OR backend silently failing | Check `~/.bicameral/local/bicameral.log` for `[sync_middleware] team pull failed` lines. | +| OAuth refresh fails | Cached refresh token expired/revoked | Delete `~/.bicameral/google-drive-token.json` and re-run setup. | diff --git a/events/backends/__init__.py b/events/backends/__init__.py new file mode 100644 index 00000000..30839281 --- /dev/null +++ b/events/backends/__init__.py @@ -0,0 +1,65 @@ +"""Pluggable event-log transport backends (#277). + +Each backend moves per-author JSONL files between local cache and a +shared remote root. Backends know nothing about JSONL contents — pure +file transport. The remote root is a flat namespace of +``.jsonl`` files (one per peer) plus optional +``.lock`` sentinels. + +Pull-only sync model: no daemons, no webhooks, no background polling. +""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from collections.abc import AsyncIterator +from contextlib import AbstractAsyncContextManager +from pathlib import Path + + +class BackendAdapter(ABC): + """Move per-author event files between local cache and a remote root.""" + + @abstractmethod + async def push_events(self, local_path: Path, remote_name: str) -> None: + """Upload ``local_path`` to ``/``. + + Idempotent: skip when remote hash matches local. + """ + + @abstractmethod + async def pull_events(self, local_dir: Path, since_token: str | None) -> str: + """Download every peer's JSONL into ``local_dir``. + + Skips the caller's own file (their local copy is authoritative). + Returns an opaque token the caller passes back next time to enable + since-cursor optimization (backends free to ignore and return ""). + Idempotent. + """ + + @abstractmethod + def lock(self, remote_name: str) -> AbstractAsyncContextManager[None]: + """Best-effort write lock. Caller handles races on its own.""" + + @abstractmethod + async def list_peers(self) -> AsyncIterator[str]: + """Yield ```` for every peer file in remote_root.""" + + +def get_backend(config: dict) -> BackendAdapter | None: + """Construct the configured backend, or None when not in use. + + Reads ``team.backend`` from the parsed config dict; supported values: + ``local_folder``, ``google_drive``. Anything else (including absent) + returns ``None`` — team mode then behaves as today (local-only events). + """ + team = config.get("team") or {} + kind = team.get("backend") + author = team.get("author", "") + if kind == "local_folder": + from .local_folder import LocalFolderAdapter + return LocalFolderAdapter(remote_root=Path(team["remote_root"]), author=author) + if kind == "google_drive": + from .google_drive import GoogleDriveAdapter + return GoogleDriveAdapter(folder_id=team["folder_id"], author=author) + return None diff --git a/events/backends/google_drive.py b/events/backends/google_drive.py new file mode 100644 index 00000000..d3c24ea0 --- /dev/null +++ b/events/backends/google_drive.py @@ -0,0 +1,319 @@ +"""GoogleDriveAdapter — BackendAdapter against Google Drive Files API (#277). + +Security posture: + + * Bundled OAuth client. We ship Bicameral's own ``client_id`` + + ``client_secret`` for the desktop-app OAuth client. Per RFC 8252, the + secret in installed apps is NOT confidential — it's a shared identifier, + not auth credential, exactly like ``gh`` / ``gcloud`` / ``cursor``. The + user-facing security model is the consent screen + Google's verified-app + badge, not secret confidentiality. + * Drive scope: ``https://www.googleapis.com/auth/drive.file`` only — + Bicameral can only see files it created. Other Drive content stays + invisible. + * Token cache: ``~/.bicameral/google-drive-token.json`` written 0600. + +Pull-only sync model — no daemons, no webhooks. Caller drives push/pull +cadence (typically once per tool-call lifecycle). + +GCP / OAuth verification submission text lives at +``docs/google-oauth-verification-submission.md``. +""" + +from __future__ import annotations + +import asyncio +import hashlib +import logging +import os +from collections.abc import AsyncIterator +from contextlib import asynccontextmanager +from pathlib import Path + +logger = logging.getLogger(__name__) + +DRIVE_SCOPE = "https://www.googleapis.com/auth/drive.file" +DEFAULT_TOKEN_PATH = Path.home() / ".bicameral" / "google-drive-token.json" +FOLDER_MIMETYPE = "application/vnd.google-apps.folder" + +# Bundled OAuth client (#277). Replace these placeholders with the real values +# once the GCP project is provisioned. See docs/google-oauth-verification-submission.md +# for the verification process. +# +# Placeholder sentinel: detected at runtime so users get a clear error before +# Bicameral ships the real credentials, rather than an opaque OAuth failure. +_BUNDLED_CLIENT_ID = "734983128365-199hrimc908o5uam4kvgqgegrra5ta0j.apps.googleusercontent.com" +_BUNDLED_CLIENT_SECRET = "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" +_PLACEHOLDER_PREFIX = "REPLACE_WITH_" + + +class OAuthClientNotProvisionedError(RuntimeError): + """Bicameral's bundled Drive OAuth client hasn't been published yet. + + Raised when the source still carries the placeholder client_id/secret. + Once Jin provisions the GCP project and replaces the constants, this + error becomes unreachable. + """ + + +class FolderNotFoundError(RuntimeError): + """The configured Drive folder ID does not exist or is not shared with us.""" + + +class ReadOnlyAccessError(RuntimeError): + """The Drive folder is shared with us but we lack Editor access.""" + + +def _md5_bytes(b: bytes) -> str: + return hashlib.md5(b).hexdigest() + + +def _md5_file(path: Path) -> str: + h = hashlib.md5() + with open(path, "rb") as f: + for chunk in iter(lambda: f.read(65536), b""): + h.update(chunk) + return h.hexdigest() + + +def _build_drive_service(creds): + """Construct the Drive v3 service. Stub seam for tests.""" + from googleapiclient.discovery import build # type: ignore[import-not-found] + + return build("drive", "v3", credentials=creds, cache_discovery=False) + + +def _bundled_client_config() -> dict: + """Return the bundled Bicameral OAuth client config. + + Raises ``OAuthClientNotProvisionedError`` when the placeholder constants + are still in source — this happens during local dev before the GCP + project is provisioned. Once Jin replaces the constants, this branch + becomes unreachable. + """ + if ( + _BUNDLED_CLIENT_ID.startswith(_PLACEHOLDER_PREFIX) + or _BUNDLED_CLIENT_SECRET.startswith(_PLACEHOLDER_PREFIX) + ): + raise OAuthClientNotProvisionedError( + "Bicameral's Google Drive OAuth client isn't published yet. " + "If you're a Bicameral developer, see " + "docs/google-oauth-verification-submission.md for the GCP setup. " + "If you're a user seeing this error, please file an issue — " + "you got here ahead of the official release." + ) + return { + "installed": { + "client_id": _BUNDLED_CLIENT_ID, + "client_secret": _BUNDLED_CLIENT_SECRET, + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://oauth2.googleapis.com/token", + "redirect_uris": ["http://localhost"], + } + } + + +class GoogleDriveAdapter: + """BackendAdapter against a single Google Drive folder.""" + + def __init__( + self, + folder_id: str | None, + author: str, + token_path: Path | None = None, + ) -> None: + self._folder_id = folder_id + self._author = author + self._token_path = token_path or DEFAULT_TOKEN_PATH + self._service = None + self._service_lock = asyncio.Lock() + + # ── OAuth ──────────────────────────────────────────────────────────── + + def _credentials(self): + """Resolve cached or freshly-minted user credentials. + + Loads token from ``self._token_path`` if present and valid; refreshes + on expiry; otherwise launches the local-loopback OAuth flow. + Persists the resulting token at mode 0600. + """ + from google.auth.transport.requests import Request # type: ignore[import-not-found] + from google.oauth2.credentials import Credentials # type: ignore[import-not-found] + from google_auth_oauthlib.flow import InstalledAppFlow # type: ignore[import-not-found] + + creds = None + if self._token_path.exists(): + creds = Credentials.from_authorized_user_file( + str(self._token_path), [DRIVE_SCOPE] + ) + if creds and creds.valid: + return creds + if creds and creds.expired and creds.refresh_token: + creds.refresh(Request()) + else: + client_config = _bundled_client_config() + flow = InstalledAppFlow.from_client_config(client_config, [DRIVE_SCOPE]) + creds = flow.run_local_server(port=0) + + self._token_path.parent.mkdir(parents=True, exist_ok=True) + self._token_path.write_text(creds.to_json(), encoding="utf-8") + try: + os.chmod(self._token_path, 0o600) + except OSError: + pass # Windows / non-POSIX + return creds + + async def _service_ready(self): + async with self._service_lock: + if self._service is None: + self._service = _build_drive_service(self._credentials()) + return self._service + + # ── Push / pull / list / lock ─────────────────────────────────────── + + async def push_events(self, local_path: Path, remote_name: str) -> None: + from googleapiclient.http import MediaFileUpload # type: ignore[import-not-found] + + svc = await self._service_ready() + existing = ( + svc.files() + .list( + q=f"'{self._folder_id}' in parents and name='{remote_name}' and trashed=false", + fields="files(id, md5Checksum)", + pageSize=1, + ) + .execute() + .get("files", []) + ) + local_md5 = _md5_file(local_path) + if existing and existing[0].get("md5Checksum") == local_md5: + return + media = MediaFileUpload(str(local_path), mimetype="application/x-ndjson", resumable=False) + if existing: + svc.files().update(fileId=existing[0]["id"], media_body=media).execute() + else: + svc.files().create( + body={"name": remote_name, "parents": [self._folder_id]}, + media_body=media, + fields="id", + ).execute() + + async def pull_events(self, local_dir: Path, since_token: str | None) -> str: + svc = await self._service_ready() + local_dir.mkdir(parents=True, exist_ok=True) + own_name = f"{self._author}.jsonl" + + q_parts = [f"'{self._folder_id}' in parents", "trashed=false", "name contains '.jsonl'"] + if since_token: + q_parts.append(f"modifiedTime > '{since_token}'") + files = ( + svc.files() + .list( + q=" and ".join(q_parts), + fields="files(id, name, md5Checksum, modifiedTime)", + pageSize=1000, + ) + .execute() + .get("files", []) + ) + max_modified = since_token or "" + for f in files: + name = f.get("name", "") + if name == own_name or not name.endswith(".jsonl"): + continue + local_path = local_dir / name + local_md5 = _md5_file(local_path) if local_path.exists() else None + if local_md5 and local_md5 == f.get("md5Checksum"): + if f.get("modifiedTime", "") > max_modified: + max_modified = f["modifiedTime"] + continue + data = svc.files().get_media(fileId=f["id"]).execute() + local_path.write_bytes(data if isinstance(data, bytes) else bytes(data)) + if f.get("modifiedTime", "") > max_modified: + max_modified = f["modifiedTime"] + return max_modified + + @asynccontextmanager + async def lock(self, remote_name: str): + """Best-effort sentinel-file lock. No-blocking: caller handles races.""" + svc = await self._service_ready() + sentinel_name = f"{remote_name}.lock" + created = ( + svc.files() + .create( + body={"name": sentinel_name, "parents": [self._folder_id]}, + fields="id", + ) + .execute() + ) + lock_id = created.get("id") + try: + yield + finally: + try: + svc.files().delete(fileId=lock_id).execute() + except Exception as exc: + logger.warning("[gdrive] failed to release sentinel lock: %s", exc) + + async def list_peers(self) -> AsyncIterator[str]: + svc = await self._service_ready() + files = ( + svc.files() + .list( + q=f"'{self._folder_id}' in parents and name contains '.jsonl' and trashed=false", + fields="files(name)", + pageSize=1000, + ) + .execute() + .get("files", []) + ) + for f in files: + name = f.get("name", "") + if name.endswith(".jsonl"): + yield name[: -len(".jsonl")] + + # ── Helpers used by setup wizard (Phase 3) ────────────────────────── + + def create_folder(self, name: str) -> str: + """Create a new shared folder in the operator's Drive root. Returns ID.""" + # Synchronous: setup wizard runs outside the event loop. + creds = self._credentials() + svc = _build_drive_service(creds) + result = ( + svc.files() + .create( + body={"name": name, "mimeType": FOLDER_MIMETYPE}, + fields="id", + ) + .execute() + ) + return result["id"] + + def verify_access(self) -> None: + """Confirm we can list + write to the configured folder. Sync. + + Raises FolderNotFoundError on 404, ReadOnlyAccessError when + capabilities.canEdit is False. + """ + from googleapiclient.errors import HttpError # type: ignore[import-not-found] + + creds = self._credentials() + svc = _build_drive_service(creds) + try: + meta = ( + svc.files() + .get(fileId=self._folder_id, fields="id,capabilities") + .execute() + ) + except HttpError as exc: + if getattr(exc, "resp", None) is not None and exc.resp.status == 404: + raise FolderNotFoundError( + f"Drive folder {self._folder_id!r} not found. Check the ID, " + "or ask the founding member to share it with your Google account." + ) from exc + raise + if not meta.get("capabilities", {}).get("canEdit", False): + raise ReadOnlyAccessError( + f"Drive folder {self._folder_id!r} is read-only for this account. " + "Ask the founding member to grant Editor access." + ) diff --git a/events/backends/local_folder.py b/events/backends/local_folder.py new file mode 100644 index 00000000..90166e44 --- /dev/null +++ b/events/backends/local_folder.py @@ -0,0 +1,72 @@ +"""LocalFolderAdapter — BackendAdapter backed by a shared filesystem path (#277). + +Useful as an integration-test backend and as a fallback for orgs that already +share a synced folder (NFS, Dropbox, syncthing). Same wire shape as +GoogleDriveAdapter so the rest of the system is backend-agnostic. +""" + +from __future__ import annotations + +import asyncio +import hashlib +import shutil +from collections.abc import AsyncIterator +from contextlib import asynccontextmanager +from pathlib import Path + + +def _sha256_file(path: Path) -> str: + h = hashlib.sha256() + with open(path, "rb") as f: + for chunk in iter(lambda: f.read(65536), b""): + h.update(chunk) + return h.hexdigest() + + +class LocalFolderAdapter: + """Move per-author event files via a shared filesystem directory. + + Each author writes only to ``/.jsonl``. Pull + copies every peer's file (everything except the caller's own) into the + caller's local events_dir; copies are skipped when sha256 matches. + """ + + def __init__(self, remote_root: Path, author: str) -> None: + self._remote_root = Path(remote_root) + self._author = author + self._remote_root.mkdir(parents=True, exist_ok=True) + self._lock_locks: dict[str, asyncio.Lock] = {} + + async def push_events(self, local_path: Path, remote_name: str) -> None: + target = self._remote_root / remote_name + if target.exists() and _sha256_file(target) == _sha256_file(local_path): + return + shutil.copy2(local_path, target) + + async def pull_events(self, local_dir: Path, since_token: str | None) -> str: + local_dir.mkdir(parents=True, exist_ok=True) + own_name = f"{self._author}.jsonl" + for remote_path in self._remote_root.glob("*.jsonl"): + if remote_path.name == own_name: + continue + local_path = local_dir / remote_path.name + if local_path.exists() and _sha256_file(local_path) == _sha256_file(remote_path): + continue + shutil.copy2(remote_path, local_path) + return "" + + @asynccontextmanager + async def lock(self, remote_name: str): + """Best-effort serialization within this process via asyncio.Lock. + + Cross-process locking via fcntl/msvcrt is intentionally out of v0 + scope — same-author cross-machine writes are an edge case (the + per-author file model already serializes the common case). + """ + lock = self._lock_locks.setdefault(remote_name, asyncio.Lock()) + async with lock: + yield + + async def list_peers(self) -> AsyncIterator[str]: + for path in sorted(self._remote_root.glob("*.jsonl")): + yield path.stem diff --git a/events/team_adapter.py b/events/team_adapter.py index 0575c255..e5ba308d 100644 --- a/events/team_adapter.py +++ b/events/team_adapter.py @@ -25,20 +25,45 @@ def __init__( inner, writer: EventFileWriter, materializer: EventMaterializer, + backend=None, ) -> None: self._inner = inner self._writer = writer self._materializer = materializer + self._backend = backend + self._dirty = False self._ready = False async def connect(self) -> None: - """Connect inner adapter, then replay any new events from peers.""" + """Connect inner adapter, pull peer events from the backend (if any), + then replay everything new from disk.""" await self._inner.connect() + if self._backend is not None: + try: + await self._backend.pull_events( + self._writer.events_dir, since_token=None + ) + except Exception as exc: + logger.warning("[team] backend pull failed on connect: %s", exc) replayed = await self._materializer.replay_new_events(self._inner) if replayed: logger.info("[team] materialized %d peer events on startup", replayed) self._ready = True + async def flush_to_backend(self) -> None: + """Push the author's own JSONL to the remote when writes have happened. + + Called from the post-handler middleware so writes propagate at most + once per tool-call lifecycle (vs once per individual event, which + would hammer remote APIs). + """ + if self._backend is None or not self._dirty: + return + await self._backend.push_events( + self._writer.path, remote_name=self._writer.path.name + ) + self._dirty = False + async def _ensure_ready(self) -> None: """Lazy connect + materialize on first use.""" if not self._ready: @@ -56,6 +81,7 @@ async def ingest_payload(self, payload: dict, ctx=None) -> dict: """ await self._ensure_ready() self._writer.write("ingest.completed", payload) + self._dirty = True return await self._inner.ingest_payload(payload, ctx=ctx) async def ingest_commit( @@ -79,6 +105,7 @@ async def ingest_commit( "link_commit.completed", {"commit_hash": commit_hash, "repo_path": repo_path}, ) + self._dirty = True return await self._inner.ingest_commit( commit_hash, repo_path, @@ -131,6 +158,7 @@ async def bind_decision( "end_line": end_line, }, ) + self._dirty = True return await self._inner.bind_decision( decision_id=decision_id, file_path=file_path, @@ -158,6 +186,7 @@ async def apply_ratify(self, decision_id: str, signoff: dict) -> str: "signoff": signoff, }, ) + self._dirty = True return await self._inner.apply_ratify(decision_id, signoff) async def apply_supersede( @@ -190,6 +219,7 @@ async def apply_supersede( "session_id": session_id, }, ) + self._dirty = True return await self._inner.apply_supersede( new_id=new_id, old_id=old_id, @@ -242,6 +272,7 @@ async def apply_resolve_compliance( "evidence": evidence, }, ) + self._dirty = True async def wipe_all_rows(self, repo: str) -> None: """Wipe the DB then reset the event watermark. diff --git a/handlers/sync_middleware.py b/handlers/sync_middleware.py index 52e376d5..b9059875 100644 --- a/handlers/sync_middleware.py +++ b/handlers/sync_middleware.py @@ -221,6 +221,55 @@ async def get_session_start_banner(ctx) -> SessionStartBanner | None: ) +# ── Team-mode backend sync (#277) ─────────────────────────────────────── +# In-process TTL cache so a tight tool-call loop in one session doesn't +# hammer Drive on every invocation. 30 s is the user-visible bound on +# how stale peer events can be at the start of a tool call. +_LAST_TEAM_PULL_AT: dict[str, float] = {} +_TEAM_PULL_TTL_S = 30.0 + + +async def ensure_team_synced(ctx) -> None: + """Pull peer events from the team backend, TTL-cached per repo. + + No-op when ledger is solo (no `_backend`), backend is None, or the + last pull for this repo finished within the TTL. Swallows backend + errors at DEBUG so a transient remote failure never blocks tool dispatch. + """ + ledger = getattr(ctx, "ledger", None) + if ledger is None: + return + backend = getattr(ledger, "_backend", None) + if backend is None: + return + repo = getattr(ctx, "repo_path", "") or "." + now = time.monotonic() + last = _LAST_TEAM_PULL_AT.get(repo, 0.0) + if now - last < _TEAM_PULL_TTL_S: + return + try: + await backend.pull_events(ledger._writer.events_dir, since_token=None) + await ledger._materializer.replay_new_events(ledger._inner) + _LAST_TEAM_PULL_AT[repo] = now + except Exception as exc: + logger.debug("[sync_middleware] team pull failed: %s", exc) + + +async def flush_team_writes(ctx) -> None: + """Push the author's JSONL to the backend if any write happened. + + Called from the dispatch `finally` so a successful tool call propagates + its events to peers at most once per call. Swallows errors at DEBUG. + """ + ledger = getattr(ctx, "ledger", None) + if ledger is None or not hasattr(ledger, "flush_to_backend"): + return + try: + await ledger.flush_to_backend() + except Exception as exc: + logger.debug("[sync_middleware] team flush failed: %s", exc) + + async def ensure_ledger_synced(ctx) -> LinkCommitResponse | None: """Sync ledger to HEAD if it has moved since the last sync in this process. diff --git a/pyproject.toml b/pyproject.toml index ac07ade1..6bf0ae46 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,6 +44,9 @@ dependencies = [ "surrealdb==2.0.0", "questionary>=2.0.0", "sigstore>=3.0", + # Google Drive backend for team-mode replication (#277). + "google-auth-oauthlib>=1.2", + "google-api-python-client>=2.100", ] [project.optional-dependencies] diff --git a/requirements.txt b/requirements.txt index a48f5f19..03073d7a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,5 +18,9 @@ cocoindex>=0.3.36,<1.0.0 # v1.0 alpha breaks flow API (op, flow_def, sources) sqlite-vec>=0.1.9 sentence-transformers>=2.6.0 # cocoindex-code removed — unused, and it pins cocoindex==1.0.0a38 (alpha) which breaks the v0.3 flow API +# Google Drive backend (#277 — team-mode remote event log) +google-auth-oauthlib>=1.2 +google-api-python-client>=2.100 + pytest>=8.0.0 pytest-asyncio>=0.23.0 diff --git a/server.py b/server.py index b3539612..e10179a5 100644 --- a/server.py +++ b/server.py @@ -999,9 +999,10 @@ async def _call_tool_impl(name: str, arguments: dict) -> list[TextContent]: # can surface pending_compliance_checks in the outer tool response. _sync_result = None if name not in ("bicameral.link_commit", "link_commit", "bicameral.update", "update"): - from handlers.sync_middleware import ensure_ledger_synced + from handlers.sync_middleware import ensure_ledger_synced, ensure_team_synced _sync_result = await ensure_ledger_synced(ctx) + await ensure_team_synced(ctx) try: if name in ("bicameral.link_commit", "link_commit"): @@ -1212,6 +1213,13 @@ async def _call_tool_impl(name: str, arguments: dict) -> list[TextContent]: text=json.dumps({"error": str(exc), "action": action}, indent=2), ) ] + finally: + # #277: push the author's JSONL to the team backend at most once + # per tool-call lifecycle. No-op in solo mode or when no writes + # happened. Errors swallowed at DEBUG inside the helper. + from handlers.sync_middleware import flush_team_writes + + await flush_team_writes(ctx) async def run_smoke_test() -> dict[str, object]: diff --git a/setup_wizard.py b/setup_wizard.py index 4d98a357..075c739b 100644 --- a/setup_wizard.py +++ b/setup_wizard.py @@ -13,6 +13,7 @@ import hashlib import json +import os import shutil import subprocess import sys @@ -951,6 +952,207 @@ def _select_collaboration_mode() -> str: return result if result is not None else "team" +# ── Team-mode backend wizard (#277) ───────────────────────────────────── + + +def _prompt_text_or_exit(message: str) -> str: + """Prompt for free text; exit cleanly if the user aborts (Ctrl-C).""" + import questionary + + answer = questionary.text(message).ask() + if answer is None: + sys.exit("Setup aborted.") + return answer.strip() + + +def _prompt_yes_no(message: str, default: bool = False) -> bool: + """Yes/no confirmation with explicit default. Default-No when in doubt.""" + import questionary + + answer = questionary.confirm(message, default=default).ask() + return bool(answer) if answer is not None else default + + +def _extract_folder_id(raw: str) -> str: + """Accept either a raw folder ID or a Drive folder URL.""" + raw = raw.strip() + marker = "/folders/" + if marker in raw: + tail = raw.split(marker, 1)[1] + return tail.split("?", 1)[0].split("/", 1)[0] + return raw + + +def _resolved_signer(repo_path: Path) -> str: + """The signer string the team will see in the JSONL substrate. + + Defaults to the privacy-positive `local-part-only` policy (see + events/writer.py::_resolve_signer_email) — operator can override in + config.yaml after setup. + """ + from events.writer import _get_git_email, _resolve_signer_email + + return _resolve_signer_email(_get_git_email(str(repo_path)), mode="local-part-only") + + +# ── Colored security disclosure (#277, user request) ──────────────────── +# Stays explicit about what Bicameral can and cannot see BEFORE we open the +# OAuth consent browser. Color rendered only when stdout is a TTY; falls +# back to plain text in pipes / CI / non-interactive contexts. + +_ANSI_BOLD = "\033[1m" +_ANSI_GREEN = "\033[32m" +_ANSI_YELLOW = "\033[33m" +_ANSI_CYAN = "\033[36m" +_ANSI_RESET = "\033[0m" + + +def _color(s: str, code: str) -> str: + """Wrap with ANSI color code only when stdout is a TTY.""" + if not sys.stdout.isatty(): + return s + return f"{code}{s}{_ANSI_RESET}" + + +def _print_drive_security_disclosure() -> None: + """Show the security model BEFORE the browser redirects to Google. + + Operator gets one last chance to read what Bicameral can and cannot see + before granting Drive access. Required by team-mode setup UX (#277). + """ + border = _color("─" * 72, _ANSI_CYAN) + bold = _ANSI_BOLD if sys.stdout.isatty() else "" + reset = _ANSI_RESET if sys.stdout.isatty() else "" + + print() + print(border) + print(_color(" About to open Google sign-in. Read this first:", _ANSI_BOLD)) + print(border) + print() + print(f" {bold}What flows where{reset}") + print(f" {_color('•', _ANSI_GREEN)} Decision data (transcripts, payloads) flows your-CLI ↔ Google directly.") + print(" Bicameral the company does NOT receive copies. No Bicameral server in the loop.") + print(f" {_color('•', _ANSI_GREEN)} Your OAuth token stays on your machine ({_color('~/.bicameral/google-drive-token.json', _ANSI_CYAN)}, mode 0600).") + print() + print(f" {bold}What the `drive.file` scope means for the rest of your Drive{reset}") + print(f" {_color('•', _ANSI_GREEN)} The Bicameral CLI on your machine can only touch files it creates") + print(" in the team folder. Your other Drive content (other folders, Google Docs,") + print(" shared files) is invisible to the CLI — Google enforces this.") + print() + print(f" {bold}What Bicameral the company CAN see (as the OAuth app publisher){reset}") + print(f" {_color('•', _ANSI_YELLOW)} Aggregate API request counts. {_color('Not contents.', _ANSI_BOLD)}") + print(f" {_color('•', _ANSI_YELLOW)} OAuth consent records: which Google accounts authenticated, when.") + print() + print(f" {bold}The trust dependency you're accepting{reset}") + print(" Same as any OAuth tool (gh, gcloud, Notion, Slack desktop): you trust") + print(" that the open-source CLI behaves as advertised. Source: github.com/BicameralAI/bicameral-mcp") + print() + print(" Full security model: docs/team-mode-setup.md") + print(border) + print() + + +def _select_team_backend(repo_path: Path) -> dict: + """Top-level Create vs Join vs LocalFolder dispatch. Returns team config dict.""" + import questionary + + if not _is_interactive(): + return {} + + intent = questionary.select( + "How do you want to set up the shared ledger?", + choices=[ + questionary.Choice( + "Create a new shared ledger (you become the founding member)", + value="create", + ), + questionary.Choice( + "Join an existing shared ledger (paste a folder ID from a teammate)", + value="join", + ), + questionary.Choice( + "Use a shared filesystem instead (NFS, Dropbox, syncthing) — advanced", + value="local_folder", + ), + ], + default="create", + ).ask() + if intent is None or intent == "create": + return _create_shared_ledger_drive(repo_path) + if intent == "join": + return _join_shared_ledger_drive(repo_path) + return _select_local_folder_backend() + + +def _create_shared_ledger_drive(repo_path: Path) -> dict: + """Create branch — operator becomes the founding member.""" + from events.backends.google_drive import GoogleDriveAdapter + + _print_drive_security_disclosure() + if not _prompt_yes_no("Open browser to grant Drive access?", default=True): + sys.exit("Aborted. Re-run setup when you're ready to grant access.") + adapter = GoogleDriveAdapter(folder_id=None, author=_resolved_signer(repo_path)) + adapter._credentials() # runs OAuth, caches token + repo_name = Path(repo_path).resolve().name + folder_id = adapter.create_folder(name=f"bicameral-{repo_name}-ledger") + print() + print(f" Created shared ledger folder: bicameral-{repo_name}-ledger") + print(f" Folder ID: {folder_id}") + print(f" URL: https://drive.google.com/drive/folders/{folder_id}") + print() + print(" Send this to your teammates so they can Join:") + print( + f' "Share this folder with my teammate as Editor, then run `bicameral setup` ' + f"and paste this folder ID: {folder_id}\"" + ) + print() + return {"backend": "google_drive", "folder_id": folder_id, "role": "founding_member"} + + +def _join_shared_ledger_drive(repo_path: Path) -> dict: + """Join branch — verify access, confirm identity, then persist.""" + raw = _prompt_text_or_exit( + "Paste the shared ledger folder ID (or full Drive URL) from your teammate:" + ) + folder_id = _extract_folder_id(raw) + + from events.backends.google_drive import GoogleDriveAdapter + + _print_drive_security_disclosure() + if not _prompt_yes_no("Open browser to grant Drive access?", default=True): + sys.exit("Aborted. Re-run setup when you're ready to grant access.") + + adapter = GoogleDriveAdapter(folder_id=folder_id, author=_resolved_signer(repo_path)) + adapter._credentials() + try: + adapter.verify_access() + except Exception as exc: + sys.exit(f"\n Cannot join shared ledger: {exc}") + + signer = _resolved_signer(repo_path) + confirmed = _prompt_yes_no( + f"You'll appear in the team ledger as `{signer}`. Continue?", + default=False, + ) + if not confirmed: + sys.exit( + "Aborted. Adjust signer_email_fallback in your config.yaml " + "(redact | local-part-only | full) and re-run setup." + ) + return {"backend": "google_drive", "folder_id": folder_id, "role": "member"} + + +def _select_local_folder_backend() -> dict: + """Advanced branch — shared filesystem (NFS, Dropbox, syncthing).""" + raw = _prompt_text_or_exit( + "Path to the shared folder (must exist on every teammate's machine):" + ) + p = Path(raw).expanduser().resolve() + if not p.exists() or not os.access(p, os.W_OK): + sys.exit(f"\n Path not writable: {p}") + return {"backend": "local_folder", "remote_root": str(p), "role": "member"} + + def _select_guided_mode() -> bool: """Prompt user for guided-mode intensity.""" import questionary @@ -1024,27 +1226,39 @@ def _write_collaboration_config( mode: str, guided: bool = False, telemetry: bool = False, + team_backend: dict | None = None, ) -> None: """Write .bicameral/config.yaml with collaboration mode, guided-mode, telemetry, - and signer-email fallback flags. + signer-email fallback, and (optionally) the team-backend block. `signer_email_fallback` (#200 Phase 2) defaults to `local-part-only` — privacy-positive: preserves attribution prefix on session-originated ingests without leaking the full git user.email to the ledger / team- mode JSONL substrate. Modes: `redact` (strongest, no attribution), `local-part-only` (default), `full` (legacy verbatim email). + + `team_backend` (#277): when present, persists `team:` block with + `backend`, `role`, and either `folder_id` (Drive) or `remote_root` + (LocalFolder). """ config_path = data_path / ".bicameral" / "config.yaml" config_path.parent.mkdir(parents=True, exist_ok=True) - config_path.write_text( + base = ( "# Bicameral configuration\n" f"mode: {mode}\n" f"guided: {'true' if guided else 'false'}\n" f"telemetry: {'true' if telemetry else 'false'}\n" "signer_email_fallback: local-part-only\n" - "render_source_attribution: redacted\n", # #209: privacy-positive default (was "full") - encoding="utf-8", + "render_source_attribution: redacted\n" # #209: privacy-positive default ) + if team_backend: + team_lines = ["team:"] + for key in ("backend", "folder_id", "remote_root", "role"): + if key in team_backend: + value = team_backend[key] + team_lines.append(f" {key}: {value}") + base += "\n".join(team_lines) + "\n" + config_path.write_text(base, encoding="utf-8") print(f" Collaboration: {mode} mode") print(f" Guided mode: {'on — blocking hints' if guided else 'off — advisory hints'}") print(f" Telemetry: {'on — anonymous usage stats' if telemetry else 'off'}") @@ -1175,7 +1389,19 @@ def run_setup( collab_mode = _select_collaboration_mode() guided = _select_guided_mode() telemetry = _select_telemetry() - _write_collaboration_config(data_path, collab_mode, guided=guided, telemetry=telemetry) + + team_backend: dict | None = None + if collab_mode == "team": + # #277: Create vs Join vs LocalFolder dispatch for the shared ledger. + team_backend = _select_team_backend(repo_path) + + _write_collaboration_config( + data_path, + collab_mode, + guided=guided, + telemetry=telemetry, + team_backend=team_backend, + ) _ensure_gitignore(data_path, mode=collab_mode, repo_path=repo_path) if collab_mode == "team": diff --git a/tests/test_backends_google_drive_unit.py b/tests/test_backends_google_drive_unit.py new file mode 100644 index 00000000..f4688b4c --- /dev/null +++ b/tests/test_backends_google_drive_unit.py @@ -0,0 +1,257 @@ +"""Phase 2 unit tests for events.backends.google_drive.GoogleDriveAdapter (#277). + +All Drive API calls are stubbed via unittest.mock; no network. The adapter's +contract under test: + + * push_events: idempotent on md5; uploads or updates as appropriate + * pull_events: skips own file; only downloads when remote md5 differs; + returns max modifiedTime as the next since-token + * lock: creates/deletes a sentinel file, with cleanup on exception + * verify_access: raises FolderNotFoundError on 404, ReadOnlyAccessError + when capabilities.canEdit is False + * create_folder: returns the new folder ID + * _credentials: raises MissingOAuthClientError when neither env nor + ~/.bicameral/google-drive-client.json is provisioned +""" + +from __future__ import annotations + +import hashlib +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + + +def _md5(b: bytes) -> str: + return hashlib.md5(b).hexdigest() + + +@pytest.fixture +def stub_drive(): + """Patch googleapiclient.discovery.build → MagicMock. + + Tests interact with the returned mock to set list/get/create/update + behavior per scenario. + """ + pytest.importorskip("googleapiclient") + pytest.importorskip("google_auth_oauthlib") + with patch("events.backends.google_drive._build_drive_service") as build: + svc = MagicMock() + build.return_value = svc + yield svc + + +@pytest.fixture +def stub_credentials(): + """Bypass real OAuth — return a sentinel object.""" + with patch( + "events.backends.google_drive.GoogleDriveAdapter._credentials", + return_value=MagicMock(), + ) as m: + yield m + + +def _files_list(svc, returned_files): + svc.files.return_value.list.return_value.execute.return_value = { + "files": returned_files + } + + +@pytest.mark.asyncio +async def test_push_skips_when_md5_matches(tmp_path: Path, stub_drive, stub_credentials): + from events.backends.google_drive import GoogleDriveAdapter + + body = b'{"e":"alice"}\n' + local = tmp_path / "alice@x.com.jsonl" + local.write_bytes(body) + _files_list(stub_drive, [{"id": "rid", "md5Checksum": _md5(body)}]) + + adapter = GoogleDriveAdapter(folder_id="folder-1", author="alice@x.com") + await adapter.push_events(local, "alice@x.com.jsonl") + + stub_drive.files.return_value.update.assert_not_called() + stub_drive.files.return_value.create.assert_not_called() + + +@pytest.mark.asyncio +async def test_push_updates_when_md5_differs(tmp_path: Path, stub_drive, stub_credentials): + from events.backends.google_drive import GoogleDriveAdapter + + body = b'{"e":"alice-v2"}\n' + local = tmp_path / "alice@x.com.jsonl" + local.write_bytes(body) + _files_list(stub_drive, [{"id": "rid", "md5Checksum": "stale-md5"}]) + + adapter = GoogleDriveAdapter(folder_id="folder-1", author="alice@x.com") + await adapter.push_events(local, "alice@x.com.jsonl") + + stub_drive.files.return_value.update.assert_called_once() + args, kwargs = stub_drive.files.return_value.update.call_args + assert kwargs["fileId"] == "rid" + + +@pytest.mark.asyncio +async def test_push_creates_when_remote_missing(tmp_path: Path, stub_drive, stub_credentials): + from events.backends.google_drive import GoogleDriveAdapter + + body = b"first event\n" + local = tmp_path / "alice@x.com.jsonl" + local.write_bytes(body) + _files_list(stub_drive, []) + + adapter = GoogleDriveAdapter(folder_id="folder-1", author="alice@x.com") + await adapter.push_events(local, "alice@x.com.jsonl") + + stub_drive.files.return_value.create.assert_called_once() + + +@pytest.mark.asyncio +async def test_pull_writes_only_changed_peer_files( + tmp_path: Path, stub_drive, stub_credentials +): + from events.backends.google_drive import GoogleDriveAdapter + + local_dir = tmp_path / "local" + local_dir.mkdir() + bob_existing = b"bob-old\n" + (local_dir / "bob@x.com.jsonl").write_bytes(bob_existing) + + _files_list( + stub_drive, + [ + {"id": "alice-id", "name": "alice@x.com.jsonl", "md5Checksum": "x", "modifiedTime": "2026-05-08T10:00:00Z"}, + {"id": "bob-id", "name": "bob@x.com.jsonl", "md5Checksum": _md5(bob_existing), "modifiedTime": "2026-05-08T11:00:00Z"}, + {"id": "carol-id", "name": "carol@x.com.jsonl", "md5Checksum": "y", "modifiedTime": "2026-05-08T12:00:00Z"}, + ], + ) + # Stub the get_media chain: returns a request whose .execute() returns bytes. + def _media_for(fileId): + media = MagicMock() + media.execute.return_value = b"new-content-for-" + fileId.encode() + return media + + stub_drive.files.return_value.get_media.side_effect = _media_for + + adapter = GoogleDriveAdapter(folder_id="folder-1", author="alice@x.com") + token = await adapter.pull_events(local_dir, since_token=None) + + # Carol is the only peer that should have been downloaded. + # Alice is owned (skipped); Bob's md5 matches local (skipped). + downloaded_ids = [c.kwargs.get("fileId") or c.args[0] for c in stub_drive.files.return_value.get_media.call_args_list] + assert downloaded_ids == ["carol-id"] + assert (local_dir / "carol@x.com.jsonl").read_bytes() == b"new-content-for-carol-id" + # Alice's own file must not be created locally + assert not (local_dir / "alice@x.com.jsonl").exists() + # since-token = max modifiedTime across all listed files + assert token == "2026-05-08T12:00:00Z" + + +@pytest.mark.asyncio +async def test_lock_creates_then_deletes_sentinel(stub_drive, stub_credentials): + from events.backends.google_drive import GoogleDriveAdapter + + create = stub_drive.files.return_value.create + delete = stub_drive.files.return_value.delete + create.return_value.execute.return_value = {"id": "lock-id"} + + adapter = GoogleDriveAdapter(folder_id="folder-1", author="alice@x.com") + async with adapter.lock("alice@x.com.jsonl"): + create.assert_called_once() + delete.assert_called_once() + assert delete.call_args.kwargs.get("fileId") == "lock-id" + + +@pytest.mark.asyncio +async def test_lock_releases_on_exception(stub_drive, stub_credentials): + from events.backends.google_drive import GoogleDriveAdapter + + create = stub_drive.files.return_value.create + delete = stub_drive.files.return_value.delete + create.return_value.execute.return_value = {"id": "lock-id"} + + adapter = GoogleDriveAdapter(folder_id="folder-1", author="alice@x.com") + with pytest.raises(RuntimeError, match="boom"): + async with adapter.lock("alice@x.com.jsonl"): + raise RuntimeError("boom") + delete.assert_called_once() + + +@pytest.mark.asyncio +async def test_verify_access_raises_on_404(stub_drive, stub_credentials): + pytest.importorskip("googleapiclient") + from googleapiclient.errors import HttpError + + from events.backends.google_drive import ( + FolderNotFoundError, + GoogleDriveAdapter, + ) + + fake_resp = MagicMock(status=404, reason="Not Found") + stub_drive.files.return_value.get.return_value.execute.side_effect = HttpError( + fake_resp, b"Not Found" + ) + + adapter = GoogleDriveAdapter(folder_id="missing-folder", author="alice@x.com") + with pytest.raises(FolderNotFoundError, match="missing-folder"): + adapter.verify_access() + + +@pytest.mark.asyncio +async def test_verify_access_raises_on_read_only(stub_drive, stub_credentials): + from events.backends.google_drive import ( + GoogleDriveAdapter, + ReadOnlyAccessError, + ) + + stub_drive.files.return_value.get.return_value.execute.return_value = { + "id": "f-1", + "capabilities": {"canEdit": False}, + } + adapter = GoogleDriveAdapter(folder_id="f-1", author="alice@x.com") + with pytest.raises(ReadOnlyAccessError): + adapter.verify_access() + + +@pytest.mark.asyncio +async def test_verify_access_passes_when_can_edit(stub_drive, stub_credentials): + from events.backends.google_drive import GoogleDriveAdapter + + stub_drive.files.return_value.get.return_value.execute.return_value = { + "id": "f-1", + "capabilities": {"canEdit": True}, + } + adapter = GoogleDriveAdapter(folder_id="f-1", author="alice@x.com") + adapter.verify_access() # no exception + + +@pytest.mark.asyncio +async def test_create_folder_returns_id(stub_drive, stub_credentials): + from events.backends.google_drive import GoogleDriveAdapter + + stub_drive.files.return_value.create.return_value.execute.return_value = {"id": "new123"} + adapter = GoogleDriveAdapter(folder_id=None, author="alice@x.com") + result = adapter.create_folder("bicameral-foo-ledger") + + assert result == "new123" + body = stub_drive.files.return_value.create.call_args.kwargs["body"] + assert body["name"] == "bicameral-foo-ledger" + assert body["mimeType"] == "application/vnd.google-apps.folder" + + +def test_bundled_client_config_raises_when_placeholders_present(monkeypatch): + """Placeholder client_id/secret in source must surface a clear error. + + Once Jin replaces the constants with the published OAuth client, this + test is rewritten to assert the config dict has the right shape instead. + """ + pytest.importorskip("googleapiclient") + pytest.importorskip("google_auth_oauthlib") + from events.backends import google_drive + from events.backends.google_drive import OAuthClientNotProvisionedError + + if not google_drive._BUNDLED_CLIENT_ID.startswith(google_drive._PLACEHOLDER_PREFIX): + pytest.skip("Bundled OAuth client published; placeholder detection no longer relevant.") + + with pytest.raises(OAuthClientNotProvisionedError, match="isn.t published"): + google_drive._bundled_client_config() diff --git a/tests/test_backends_local_folder.py b/tests/test_backends_local_folder.py new file mode 100644 index 00000000..aa6c8a74 --- /dev/null +++ b/tests/test_backends_local_folder.py @@ -0,0 +1,115 @@ +"""Phase 1 unit tests for events.backends.local_folder.LocalFolderAdapter (#277).""" + +from __future__ import annotations + +import asyncio +from pathlib import Path + +import pytest + +from events.backends.local_folder import LocalFolderAdapter + + +def _populate(p: Path, body: bytes) -> None: + p.parent.mkdir(parents=True, exist_ok=True) + p.write_bytes(body) + + +@pytest.mark.asyncio +async def test_push_uploads_when_remote_missing(tmp_path: Path) -> None: + local = tmp_path / "local" / "alice@x.com.jsonl" + remote = tmp_path / "remote" + remote.mkdir() + _populate(local, b'{"event_type":"ingest.completed"}\n') + + adapter = LocalFolderAdapter(remote_root=remote, author="alice@x.com") + await adapter.push_events(local, "alice@x.com.jsonl") + + assert (remote / "alice@x.com.jsonl").read_bytes() == local.read_bytes() + + +@pytest.mark.asyncio +async def test_push_skips_when_remote_hash_matches(tmp_path: Path) -> None: + local = tmp_path / "alice@x.com.jsonl" + remote = tmp_path / "remote" + remote.mkdir() + _populate(local, b'{"event_type":"ingest.completed"}\n') + + adapter = LocalFolderAdapter(remote_root=remote, author="alice@x.com") + await adapter.push_events(local, "alice@x.com.jsonl") + first_mtime = (remote / "alice@x.com.jsonl").stat().st_mtime_ns + + # Bump local mtime but keep contents identical → push must be a no-op. + import os + os.utime(local, ns=(first_mtime + 1_000_000_000, first_mtime + 1_000_000_000)) + await adapter.push_events(local, "alice@x.com.jsonl") + + assert (remote / "alice@x.com.jsonl").stat().st_mtime_ns == first_mtime + + +@pytest.mark.asyncio +async def test_pull_copies_peer_files_only(tmp_path: Path) -> None: + remote = tmp_path / "remote" + remote.mkdir() + local_dir = tmp_path / "local" + local_dir.mkdir() + _populate(remote / "alice@x.com.jsonl", b'{"e":"alice-remote"}\n') + _populate(remote / "bob@x.com.jsonl", b'{"e":"bob-remote"}\n') + _populate(local_dir / "alice@x.com.jsonl", b'{"e":"alice-local"}\n') + + adapter = LocalFolderAdapter(remote_root=remote, author="alice@x.com") + await adapter.pull_events(local_dir, since_token=None) + + assert (local_dir / "alice@x.com.jsonl").read_bytes() == b'{"e":"alice-local"}\n' + assert (local_dir / "bob@x.com.jsonl").read_bytes() == b'{"e":"bob-remote"}\n' + + +@pytest.mark.asyncio +async def test_pull_skips_unchanged(tmp_path: Path) -> None: + remote = tmp_path / "remote" + remote.mkdir() + local_dir = tmp_path / "local" + local_dir.mkdir() + _populate(remote / "bob@x.com.jsonl", b'{"e":"bob-1"}\n') + + adapter = LocalFolderAdapter(remote_root=remote, author="alice@x.com") + await adapter.pull_events(local_dir, since_token=None) + first_mtime = (local_dir / "bob@x.com.jsonl").stat().st_mtime_ns + + await adapter.pull_events(local_dir, since_token=None) + + assert (local_dir / "bob@x.com.jsonl").stat().st_mtime_ns == first_mtime + + +@pytest.mark.asyncio +async def test_list_peers_yields_email_stems(tmp_path: Path) -> None: + remote = tmp_path / "remote" + remote.mkdir() + _populate(remote / "alice@x.com.jsonl", b"") + _populate(remote / "bob@x.com.jsonl", b"") + _populate(remote / "carol@x.com.jsonl", b"") + _populate(remote / "noise.txt", b"ignore") + + adapter = LocalFolderAdapter(remote_root=remote, author="alice@x.com") + peers = sorted([name async for name in adapter.list_peers()]) + + assert peers == ["alice@x.com", "bob@x.com", "carol@x.com"] + + +@pytest.mark.asyncio +async def test_lock_serializes_concurrent_acquirers(tmp_path: Path) -> None: + """Second acquirer waits until the first releases.""" + remote = tmp_path / "remote" + remote.mkdir() + adapter = LocalFolderAdapter(remote_root=remote, author="alice@x.com") + order: list[str] = [] + + async def hold(name: str, hold_ms: int) -> None: + async with adapter.lock("alice@x.com.jsonl"): + order.append(f"{name}-acquired") + await asyncio.sleep(hold_ms / 1000) + order.append(f"{name}-released") + + await asyncio.gather(hold("first", 100), hold("second", 0)) + # First's release MUST appear before second's acquire (strict serialization). + assert order.index("first-released") < order.index("second-acquired") diff --git a/tests/test_setup_wizard_team_backend.py b/tests/test_setup_wizard_team_backend.py new file mode 100644 index 00000000..7f633ac1 --- /dev/null +++ b/tests/test_setup_wizard_team_backend.py @@ -0,0 +1,237 @@ +"""Phase 3 tests: setup wizard Create/Join/LocalFolder branches (#277). + +Wizard helpers under test live in setup_wizard.py: + * _select_team_backend(repo_path) → dict + * _create_shared_ledger_drive(repo_path) → dict + * _join_shared_ledger_drive(repo_path) → dict + * _select_local_folder_backend() → dict + * _extract_folder_id(raw) → str + * _write_collaboration_config(...) — extended to persist team backend dict + +All Drive interactions are stubbed via unittest.mock. +""" + +from __future__ import annotations + +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + + +def _read_yaml(path: Path) -> dict: + import yaml + + return yaml.safe_load(path.read_text(encoding="utf-8")) or {} + + +# ── _extract_folder_id ─────────────────────────────────────────────────── + + +def test_extract_folder_id_accepts_raw_id(): + from setup_wizard import _extract_folder_id + + assert _extract_folder_id("1AbCdEfGhIjKl_mNoPqRsTuVwXyZ-abcd") == "1AbCdEfGhIjKl_mNoPqRsTuVwXyZ-abcd" + + +def test_extract_folder_id_accepts_full_url(): + from setup_wizard import _extract_folder_id + + url = "https://drive.google.com/drive/folders/1AbCdEfGhIjKl_mNoPqRsTuVwXyZ-abcd?usp=sharing" + assert _extract_folder_id(url) == "1AbCdEfGhIjKl_mNoPqRsTuVwXyZ-abcd" + + +def test_extract_folder_id_strips_whitespace(): + from setup_wizard import _extract_folder_id + + assert _extract_folder_id(" abc123 \n") == "abc123" + + +# ── Create branch ─────────────────────────────────────────────────────── + + +def test_create_branch_persists_founding_member_role(tmp_path: Path, monkeypatch): + """End-to-end: Create flow writes founding_member role + folder_id to config.""" + from setup_wizard import _create_shared_ledger_drive, _write_collaboration_config + + monkeypatch.setattr("setup_wizard._prompt_yes_no", lambda *a, **kw: True) + + fake_adapter = MagicMock() + fake_adapter.create_folder.return_value = "abc123" + with patch( + "events.backends.google_drive.GoogleDriveAdapter", return_value=fake_adapter + ): + team_cfg = _create_shared_ledger_drive(repo_path=tmp_path) + + assert team_cfg == { + "backend": "google_drive", + "folder_id": "abc123", + "role": "founding_member", + } + fake_adapter._credentials.assert_called_once() + fake_adapter.create_folder.assert_called_once() + + _write_collaboration_config( + data_path=tmp_path, + mode="team", + guided=False, + telemetry=False, + team_backend=team_cfg, + ) + cfg = _read_yaml(tmp_path / ".bicameral" / "config.yaml") + assert cfg["mode"] == "team" + assert cfg["team"] == { + "backend": "google_drive", + "folder_id": "abc123", + "role": "founding_member", + } + + +# ── Join branch ───────────────────────────────────────────────────────── + + +def test_join_branch_extracts_folder_id_from_url(tmp_path: Path, monkeypatch): + from setup_wizard import _join_shared_ledger_drive, _write_collaboration_config + + monkeypatch.setattr( + "setup_wizard._prompt_text_or_exit", + lambda *a, **kw: "https://drive.google.com/drive/folders/xyz789?usp=sharing", + ) + monkeypatch.setattr("setup_wizard._prompt_yes_no", lambda *a, **kw: True) + + fake_adapter = MagicMock() + with patch( + "events.backends.google_drive.GoogleDriveAdapter", return_value=fake_adapter + ): + team_cfg = _join_shared_ledger_drive(repo_path=tmp_path) + + assert team_cfg["folder_id"] == "xyz789" + assert team_cfg["role"] == "member" + assert team_cfg["backend"] == "google_drive" + fake_adapter.verify_access.assert_called_once() + + _write_collaboration_config( + data_path=tmp_path, + mode="team", + guided=False, + telemetry=False, + team_backend=team_cfg, + ) + cfg = _read_yaml(tmp_path / ".bicameral" / "config.yaml") + assert cfg["team"]["folder_id"] == "xyz789" + + +def test_join_branch_verifies_access_before_persist(tmp_path: Path, monkeypatch): + """If verify_access raises, the wizard must SystemExit and write nothing.""" + from events.backends.google_drive import FolderNotFoundError + from setup_wizard import _join_shared_ledger_drive + + monkeypatch.setattr("setup_wizard._prompt_text_or_exit", lambda *a, **kw: "missing-id") + monkeypatch.setattr("setup_wizard._prompt_yes_no", lambda *a, **kw: True) + + fake_adapter = MagicMock() + fake_adapter.verify_access.side_effect = FolderNotFoundError("missing-id not found") + + with patch( + "events.backends.google_drive.GoogleDriveAdapter", return_value=fake_adapter + ): + with pytest.raises(SystemExit): + _join_shared_ledger_drive(repo_path=tmp_path) + + # No config.yaml should exist after a failed Join. + assert not (tmp_path / ".bicameral" / "config.yaml").exists() + + +def test_join_branch_aborts_on_identity_decline(tmp_path: Path, monkeypatch): + """If the operator says No to the identity confirmation, abort cleanly.""" + from setup_wizard import _join_shared_ledger_drive + + monkeypatch.setattr("setup_wizard._prompt_text_or_exit", lambda *a, **kw: "good-id") + monkeypatch.setattr("setup_wizard._prompt_yes_no", lambda *a, **kw: False) + + fake_adapter = MagicMock() + with patch( + "events.backends.google_drive.GoogleDriveAdapter", return_value=fake_adapter + ): + with pytest.raises(SystemExit): + _join_shared_ledger_drive(repo_path=tmp_path) + + assert not (tmp_path / ".bicameral" / "config.yaml").exists() + + +def test_create_aborts_when_security_disclosure_declined(tmp_path: Path, monkeypatch): + """Operator declining the post-disclosure consent must abort cleanly.""" + from setup_wizard import _create_shared_ledger_drive + + monkeypatch.setattr("setup_wizard._prompt_yes_no", lambda *a, **kw: False) + + fake_adapter = MagicMock() + with patch( + "events.backends.google_drive.GoogleDriveAdapter", return_value=fake_adapter + ): + with pytest.raises(SystemExit, match="Aborted"): + _create_shared_ledger_drive(repo_path=tmp_path) + fake_adapter._credentials.assert_not_called() + fake_adapter.create_folder.assert_not_called() + + +def test_security_disclosure_mentions_what_bicameral_can_see(capsys, monkeypatch): + """The disclosure must surface the OAuth-app-owner visibility honestly, + distinguish CLI-on-your-machine from Bicameral-the-company, and name + the trust dependency explicitly.""" + from setup_wizard import _print_drive_security_disclosure + + _print_drive_security_disclosure() + out = capsys.readouterr().out + # File-content claim: company does NOT receive copies. + assert "does NOT receive copies" in out + # Scope claim: refers to CLI-on-your-machine, not the company. + assert "Bicameral CLI on your machine" in out + # Visibility surface: aggregate API counts + consent records, not contents. + assert "Aggregate API request counts" in out + # Trust dependency named explicitly. + assert "trust dependency" in out.lower() + assert "drive.file" in out + assert "google-drive-token.json" in out + + +# ── LocalFolder branch ────────────────────────────────────────────────── + + +def test_local_folder_branch_persists_remote_root(tmp_path: Path, monkeypatch): + from setup_wizard import _select_local_folder_backend, _write_collaboration_config + + shared = tmp_path / "shared" + shared.mkdir() + monkeypatch.setattr("setup_wizard._prompt_text_or_exit", lambda *a, **kw: str(shared)) + + team_cfg = _select_local_folder_backend() + assert team_cfg == { + "backend": "local_folder", + "remote_root": str(shared), + "role": "member", + } + + _write_collaboration_config( + data_path=tmp_path, + mode="team", + guided=False, + telemetry=False, + team_backend=team_cfg, + ) + cfg = _read_yaml(tmp_path / ".bicameral" / "config.yaml") + assert cfg["team"] == { + "backend": "local_folder", + "remote_root": str(shared), + "role": "member", + } + + +def test_local_folder_branch_rejects_unwritable_path(tmp_path: Path, monkeypatch): + from setup_wizard import _select_local_folder_backend + + monkeypatch.setattr("setup_wizard._prompt_text_or_exit", lambda *a, **kw: "/") + with pytest.raises(SystemExit, match="not writable"): + _select_local_folder_backend() + + diff --git a/tests/test_sync_middleware_team.py b/tests/test_sync_middleware_team.py new file mode 100644 index 00000000..b012ae6f --- /dev/null +++ b/tests/test_sync_middleware_team.py @@ -0,0 +1,121 @@ +"""Phase 1 tests: ensure_team_synced + flush_team_writes middleware (#277).""" + +from __future__ import annotations + +from contextlib import asynccontextmanager +from pathlib import Path +from types import SimpleNamespace + +import pytest + +import handlers.sync_middleware as middleware + + +class StubBackend: + def __init__(self) -> None: + self.pull_calls = 0 + + async def push_events(self, local_path: Path, remote_name: str) -> None: + pass + + async def pull_events(self, local_dir: Path, since_token): + self.pull_calls += 1 + return "" + + @asynccontextmanager + async def lock(self, remote_name: str): + yield + + async def list_peers(self): + if False: + yield # pragma: no cover + + +class StubInner: + pass + + +class StubMaterializer: + def __init__(self) -> None: + self.replay_calls = 0 + + async def replay_new_events(self, inner) -> int: + self.replay_calls += 1 + return 0 + + +class StubLedger: + def __init__(self, backend, materializer) -> None: + self._backend = backend + self._inner = StubInner() + self._writer = SimpleNamespace(events_dir=Path("/tmp/never-touched"), path=Path("/tmp/x.jsonl")) + self._materializer = materializer + self.flush_count = 0 + self._raise_on_flush = False + + async def flush_to_backend(self) -> None: + if self._raise_on_flush: + raise RuntimeError("simulated network error") + self.flush_count += 1 + + +def _ctx(ledger, repo_path: str = "/tmp/repo") -> SimpleNamespace: + return SimpleNamespace(ledger=ledger, repo_path=repo_path) + + +@pytest.fixture(autouse=True) +def _clear_team_pull_cache(): + middleware._LAST_TEAM_PULL_AT.clear() + yield + middleware._LAST_TEAM_PULL_AT.clear() + + +@pytest.mark.asyncio +async def test_ensure_team_synced_ttl_cache(monkeypatch) -> None: + backend = StubBackend() + mat = StubMaterializer() + ledger = StubLedger(backend, mat) + ctx = _ctx(ledger, repo_path="/repo-A") + + fake_now = [1000.0] + monkeypatch.setattr(middleware.time, "monotonic", lambda: fake_now[0]) + + await middleware.ensure_team_synced(ctx) + assert backend.pull_calls == 1 + + fake_now[0] = 1015.0 # within 30 s TTL → no-op + await middleware.ensure_team_synced(ctx) + assert backend.pull_calls == 1 + + fake_now[0] = 1100.0 # past TTL → pull again + await middleware.ensure_team_synced(ctx) + assert backend.pull_calls == 2 + + +@pytest.mark.asyncio +async def test_ensure_team_synced_no_backend_is_noop() -> None: + ledger = StubLedger(backend=None, materializer=StubMaterializer()) + ctx = _ctx(ledger) + await middleware.ensure_team_synced(ctx) # must not raise + + +@pytest.mark.asyncio +async def test_ensure_team_synced_no_ledger_is_noop() -> None: + ctx = SimpleNamespace(repo_path="/x") # no ledger attr + await middleware.ensure_team_synced(ctx) + + +@pytest.mark.asyncio +async def test_flush_team_writes_swallows_backend_errors() -> None: + ledger = StubLedger(StubBackend(), StubMaterializer()) + ledger._raise_on_flush = True + ctx = _ctx(ledger) + await middleware.flush_team_writes(ctx) # must not raise + + +@pytest.mark.asyncio +async def test_flush_team_writes_calls_through_when_present() -> None: + ledger = StubLedger(StubBackend(), StubMaterializer()) + ctx = _ctx(ledger) + await middleware.flush_team_writes(ctx) + assert ledger.flush_count == 1 diff --git a/tests/test_team_adapter_with_backend.py b/tests/test_team_adapter_with_backend.py new file mode 100644 index 00000000..ab1f1011 --- /dev/null +++ b/tests/test_team_adapter_with_backend.py @@ -0,0 +1,138 @@ +"""Phase 1 tests: TeamWriteAdapter ↔ BackendAdapter wiring (#277).""" + +from __future__ import annotations + +from contextlib import asynccontextmanager +from pathlib import Path + +import pytest + +from events.materializer import EventMaterializer +from events.team_adapter import TeamWriteAdapter +from events.writer import EventFileWriter +from ledger.adapter import SurrealDBLedgerAdapter + + +class FakeBackend: + """In-memory backend stub. Records calls so tests can assert on them.""" + + def __init__(self, prepopulate: dict[str, bytes] | None = None) -> None: + self._files = dict(prepopulate or {}) + self.push_calls: list[tuple[Path, str]] = [] + self.pull_calls = 0 + + async def push_events(self, local_path: Path, remote_name: str) -> None: + self.push_calls.append((local_path, remote_name)) + self._files[remote_name] = local_path.read_bytes() + + async def pull_events(self, local_dir: Path, since_token): + self.pull_calls += 1 + local_dir.mkdir(parents=True, exist_ok=True) + for name, body in self._files.items(): + (local_dir / name).write_bytes(body) + return "" + + @asynccontextmanager + async def lock(self, remote_name: str): + yield + + async def list_peers(self): + for name in self._files: + if name.endswith(".jsonl"): + yield name[: -len(".jsonl")] + + +def _payload(intent: str, source_ref: str) -> dict: + return { + "query": intent, + "repo": "test-repo", + "commit_hash": "deadbeef00000000000000000000000000000000", + "analyzed_at": "2026-04-29T12:00:00Z", + "mappings": [ + { + "span": { + "span_id": f"span-{source_ref}", + "source_type": "transcript", + "text": intent, + "speaker": "Tester", + "source_ref": source_ref, + }, + "intent": intent, + "symbols": [], + "code_regions": [], + "dependency_edges": [], + } + ], + } + + +def _build(events_dir: Path, local_dir: Path, author: str, backend) -> tuple[TeamWriteAdapter, SurrealDBLedgerAdapter]: + inner = SurrealDBLedgerAdapter(url="memory://") + writer = EventFileWriter(events_dir, author) + materializer = EventMaterializer(events_dir, local_dir) + return TeamWriteAdapter(inner, writer, materializer, backend=backend), inner + + +@pytest.mark.asyncio +async def test_connect_pulls_then_replays(tmp_path: Path) -> None: + """Backend pull populates events_dir; then materializer applies them.""" + # Pre-stage a peer event file in the BACKEND (not in events_dir). + # connect() should pull it down then replay → inner adapter sees the ingest. + peer_payload = _payload("peer-intent", "peer-1") + import json + + peer_event = json.dumps( + { + "schema_version": 2, + "event_type": "ingest.completed", + "author": "peer@x.com", + "timestamp": "2026-05-08T00:00:00Z", + "payload": peer_payload, + }, + separators=(",", ":"), + ) + "\n" + backend = FakeBackend(prepopulate={"peer@x.com.jsonl": peer_event.encode()}) + + events_dir = tmp_path / "events" + local_dir = tmp_path / "local" + adapter, inner = _build(events_dir, local_dir, "alice@x.com", backend) + + await adapter.connect() + + assert backend.pull_calls == 1 + decisions = await inner.get_all_decisions() + intents = [d.get("description", "") for d in decisions] + assert any("peer-intent" in i for i in intents), intents + + +@pytest.mark.asyncio +async def test_write_marks_dirty_then_flush_pushes(tmp_path: Path) -> None: + backend = FakeBackend() + events_dir = tmp_path / "events" + local_dir = tmp_path / "local" + adapter, _ = _build(events_dir, local_dir, "alice@x.com", backend) + + await adapter.connect() + await adapter.ingest_payload(_payload("alice-intent", "src-1")) + + # First flush: pushes alice's file. + await adapter.flush_to_backend() + assert len(backend.push_calls) == 1 + pushed_path, pushed_name = backend.push_calls[0] + assert pushed_name == "alice@x.com.jsonl" + assert pushed_path.name == "alice@x.com.jsonl" + + # Second flush with no intervening writes: no-op. + await adapter.flush_to_backend() + assert len(backend.push_calls) == 1 + + +@pytest.mark.asyncio +async def test_no_backend_means_no_push_no_pull(tmp_path: Path) -> None: + events_dir = tmp_path / "events" + local_dir = tmp_path / "local" + adapter, _ = _build(events_dir, local_dir, "alice@x.com", backend=None) + + await adapter.connect() + await adapter.ingest_payload(_payload("solo-intent", "src-1")) + await adapter.flush_to_backend() # must not raise diff --git a/tests/test_team_round_trip_local_folder.py b/tests/test_team_round_trip_local_folder.py new file mode 100644 index 00000000..0bd7eacf --- /dev/null +++ b/tests/test_team_round_trip_local_folder.py @@ -0,0 +1,106 @@ +"""Phase 1 integration: two-author round-trip via LocalFolderAdapter (#277).""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from events.backends.local_folder import LocalFolderAdapter +from events.materializer import EventMaterializer +from events.team_adapter import TeamWriteAdapter +from events.writer import EventFileWriter +from ledger.adapter import SurrealDBLedgerAdapter + + +def _payload(intent: str, source_ref: str) -> dict: + return { + "query": intent, + "repo": "test-repo", + "commit_hash": "deadbeef00000000000000000000000000000000", + "analyzed_at": "2026-04-29T12:00:00Z", + "mappings": [ + { + "span": { + "span_id": f"span-{source_ref}", + "source_type": "transcript", + "text": intent, + "speaker": "Tester", + "source_ref": source_ref, + }, + "intent": intent, + "symbols": [], + "code_regions": [], + "dependency_edges": [], + } + ], + } + + +def _build(events_dir: Path, local_dir: Path, author: str, remote_root: Path) -> TeamWriteAdapter: + inner = SurrealDBLedgerAdapter(url="memory://") + writer = EventFileWriter(events_dir, author) + materializer = EventMaterializer(events_dir, local_dir) + backend = LocalFolderAdapter(remote_root=remote_root, author=author) + return TeamWriteAdapter(inner, writer, materializer, backend=backend) + + +@pytest.mark.asyncio +async def test_two_authors_round_trip(tmp_path: Path) -> None: + """A ingests → flushes → B connects → B's inner DB has A's decision.""" + remote_root = tmp_path / "remote" + remote_root.mkdir() + + a = _build( + events_dir=tmp_path / "a-events", + local_dir=tmp_path / "a-local", + author="alice@x.com", + remote_root=remote_root, + ) + b = _build( + events_dir=tmp_path / "b-events", + local_dir=tmp_path / "b-local", + author="bob@x.com", + remote_root=remote_root, + ) + + await a.connect() + await a.ingest_payload(_payload("alice-shared-intent", "src-A")) + await a.flush_to_backend() + + await b.connect() + decisions_b = await b._inner.get_all_decisions() + descriptions = [d.get("description", "") for d in decisions_b] + assert any("alice-shared-intent" in d for d in descriptions), descriptions + + +@pytest.mark.asyncio +async def test_pull_idempotent_across_invocations(tmp_path: Path) -> None: + """Re-pulling without remote change is a no-op (file mtime stable).""" + remote_root = tmp_path / "remote" + remote_root.mkdir() + + a = _build( + events_dir=tmp_path / "a-events", + local_dir=tmp_path / "a-local", + author="alice@x.com", + remote_root=remote_root, + ) + b = _build( + events_dir=tmp_path / "b-events", + local_dir=tmp_path / "b-local", + author="bob@x.com", + remote_root=remote_root, + ) + + await a.connect() + await a.ingest_payload(_payload("alice-once", "src-A")) + await a.flush_to_backend() + + await b.connect() + pulled_path = (tmp_path / "b-events") / "alice@x.com.jsonl" + first_mtime = pulled_path.stat().st_mtime_ns + + # Second pull on B with no remote change → mtime unchanged. + await b._backend.pull_events(b._writer.events_dir, since_token=None) + assert pulled_path.stat().st_mtime_ns == first_mtime diff --git a/thoughts/shared/plans/2026-05-08-remote-event-log-adapter.md b/thoughts/shared/plans/2026-05-08-remote-event-log-adapter.md new file mode 100644 index 00000000..ee1fb699 --- /dev/null +++ b/thoughts/shared/plans/2026-05-08-remote-event-log-adapter.md @@ -0,0 +1,448 @@ +# Plan: Remote event-log adapter (issue #277) + +**change_class**: feature + +**doc_tier**: standard + +**terms_introduced**: +- term: BackendAdapter + home: events/backends/__init__.py +- term: LocalFolderAdapter + home: events/backends/local_folder.py +- term: GoogleDriveAdapter + home: events/backends/google_drive.py +- term: remote_root + home: docs/team-mode-setup.md +- term: Create (shared ledger) + home: docs/team-mode-setup.md +- term: Join (shared ledger) + home: docs/team-mode-setup.md +- term: founding member + home: docs/team-mode-setup.md +- term: shared ledger + home: docs/team-mode-setup.md + +**boundaries**: +- limitations: + - Pull-only sync. No webhook receivers, no daemons, no background polling. + - One remote folder per repo. Multi-tenant orchestration is out of scope. + - Conflict resolution is by ordering (timestamp) + DB-level `canonical_id` UNIQUE; no merge UI. + - Sync cadence is on-invocation only (with a short TTL cache to avoid hammering remote on every tool call). + - GoogleDriveAdapter targets a single shared Drive folder; no Shared Drive (Team Drive) special-casing in v0. +- non_goals: + - S3, Dropbox, OneDrive adapters. Interface is designed for them; bodies are P1. + - Encryption at rest beyond what the backend provides. + - Real-time push or change notifications. +- exclusions: + - Mutating peer event files. Each author writes ONLY to `.jsonl`. Pull is read-only with respect to peer files; we replace local copies with remote contents. + +## Open Questions + +1. **Sync TTL on `pull_events`** — proposal: 30 s in-process cache keyed on `(remote_root, repo_path)`. Rationale: a session with a tight tool-call loop shouldn't make 20 Drive API calls per minute, but a multi-minute coding session should pick up peer events. Alternative is `pull_events` always runs and the LocalFolderAdapter is fast enough that it doesn't matter — but Drive is not. +2. **`push_events` cadence** — proposal: push the author's own JSONL file once per tool-call lifecycle (at end of `TeamWriteAdapter._ensure_ready` if any writes happened in this process, OR explicitly at end of `handle_*` for write tools). Per-event push is rejected (Drive rate limits + adds latency to every write). +3. **Drive OAuth redirect** — proposal: localhost loopback (`http://localhost:/callback`) following the standard `google-auth-oauthlib.flow.InstalledAppFlow.run_local_server()` pattern. No public callback URL needed. +4. **Lock semantics** — proposal: `lock(remote_path)` returns a context manager that's a no-op for LocalFolderAdapter when `remote_path == events_dir/{my-email}.jsonl` (same-author writes serialize via the existing fcntl lock in `events/writer.py`); GoogleDriveAdapter uses a sentinel file (`.lock`) with last-writer-wins semantics. Caller is expected to retry-on-conflict, not block. For v0, no caller actually invokes `lock()` — it exists in the protocol so future writers (e.g., a second machine for the same author) can opt in. + +## Phase 1: BackendAdapter protocol + LocalFolderAdapter + wire into TeamWriteAdapter + +### Affected Files + +- `events/backends/__init__.py` (new) — `BackendAdapter` ABC; module-level factory `get_backend(config) -> BackendAdapter | None` +- `events/backends/local_folder.py` (new) — `LocalFolderAdapter` implementing the protocol against a shared filesystem path +- `events/team_adapter.py` — accept optional `backend: BackendAdapter | None`; in `connect()` call `backend.pull_events()` BEFORE `materializer.replay_new_events()`; after every `_writer.write(...)` set a "dirty" flag; expose an `async def flush_to_backend()` that uploads the author's file when dirty +- `events/materializer.py` — no change to the body; the contract that "files in `events_dir` are the source of truth" is preserved +- `adapters/ledger.py` — read `team.backend` from `.bicameral/config.yaml`; if present, construct the corresponding `BackendAdapter` and pass into `TeamWriteAdapter` +- `handlers/sync_middleware.py` — add `async def ensure_team_synced(ctx)` (TTL-cached pull); call it from the same dispatch site as `ensure_ledger_synced`; call `flush_to_backend()` after handler completion for write tools +- `server.py` — wire `ensure_team_synced` and the post-handler flush at the dispatch site (mirror existing `ensure_ledger_synced` placement) + +### Changes + +**`events/backends/__init__.py`** — small ABC, four operations, no JSONL knowledge: + +```python +from __future__ import annotations +from abc import ABC, abstractmethod +from contextlib import AbstractAsyncContextManager +from pathlib import Path +from typing import AsyncIterator + +class BackendAdapter(ABC): + """Move per-author event files between local cache and a remote root. + + Knows nothing about JSONL contents — pure file transport. The remote + root is a flat directory of .jsonl files (one per peer) + plus optional .lock sentinel files. + """ + + @abstractmethod + async def push_events(self, local_path: Path, remote_name: str) -> None: + """Upload local_path to /. Idempotent + (skip when remote hash matches local).""" + + @abstractmethod + async def pull_events(self, local_dir: Path, since_token: str | None) -> str: + """Download every .jsonl in remote_root into local_dir, + skipping the caller's own file. Returns an opaque token the caller + passes back next time to enable since-cursor optimization (backends + free to ignore and return ""). Idempotent.""" + + @abstractmethod + def lock(self, remote_name: str) -> AbstractAsyncContextManager[None]: + """Best-effort write lock. Caller must handle races on its own.""" + + @abstractmethod + async def list_peers(self) -> AsyncIterator[str]: + """Yield for every peer file in remote_root.""" + +def get_backend(config: dict) -> "BackendAdapter | None": + backend_kind = (config.get("team") or {}).get("backend") + if backend_kind == "local_folder": + from .local_folder import LocalFolderAdapter + return LocalFolderAdapter(remote_root=Path(config["team"]["remote_root"]), + author=config["team"]["author"]) + if backend_kind == "google_drive": + from .google_drive import GoogleDriveAdapter + return GoogleDriveAdapter(folder_id=config["team"]["folder_id"], + author=config["team"]["author"]) + return None +``` + +**`events/backends/local_folder.py`** — minimal real implementation: + +- `push_events(local_path, remote_name)`: compute `sha256(local_path.read_bytes())`; if `(remote_root / remote_name).exists()` and its hash matches, return; else `shutil.copy2`. +- `pull_events(local_dir, since_token)`: iterate `remote_root.glob("*.jsonl")`; for each `peer.jsonl != f"{author}.jsonl"`, hash-compare with `local_dir / peer.jsonl`, copy when different. Returns `""` (no since-token in v0). +- `lock(remote_name)`: returns an async context manager that opens `remote_root / f"{remote_name}.lock"` with fcntl exclusive lock (POSIX) / msvcrt (Windows); no-op if remote_root is not on a filesystem that supports flock (degrade gracefully). +- `list_peers()`: yield `path.stem` for each `*.jsonl` in remote_root. + +**`events/team_adapter.py`** — accept backend, drive pull/push lifecycle: + +```python +def __init__(self, inner, writer, materializer, backend=None): + self._inner = inner + self._writer = writer + self._materializer = materializer + self._backend = backend + self._dirty = False + self._ready = False + +async def connect(self): + await self._inner.connect() + if self._backend is not None: + await self._backend.pull_events( + self._writer.events_dir, since_token=None) + replayed = await self._materializer.replay_new_events(self._inner) + ... + self._ready = True + +# After every self._writer.write(...) in existing methods, set: +# self._dirty = True + +async def flush_to_backend(self): + """Push the author's JSONL to remote if any write happened since last flush.""" + if self._backend is None or not self._dirty: + return + await self._backend.push_events( + self._writer.path, remote_name=self._writer.path.name) + self._dirty = False +``` + +**`adapters/ledger.py`** — refactor `_read_collaboration_mode` and extend the team-mode branch: + +Refactor: replace the existing `_read_collaboration_mode(repo_path) -> str` with `_read_team_config(repo_path) -> dict` that returns the full parsed config (or `{"mode": "solo"}` when no config). Update its single existing caller (`get_ledger`) to `cfg = _read_team_config(repo_path); mode = cfg.get("mode", "solo")`. The existing string-only contract is internal — no external callers grep across the tree (verified). + +```python +def _read_team_config(repo_path: str) -> dict: + """Read .bicameral/config.yaml as a dict. Returns {"mode": "solo"} if absent.""" + data_path = os.getenv("BICAMERAL_DATA_PATH", repo_path) + config_path = Path(data_path) / ".bicameral" / "config.yaml" + if not config_path.exists(): + return {"mode": "solo"} + try: + import yaml + return yaml.safe_load(config_path.read_text(encoding="utf-8")) or {"mode": "solo"} + except Exception: + return {"mode": "solo"} + +# In get_ledger(): +if mode == "team": + ... + cfg = _read_team_config(repo_path) + cfg.setdefault("team", {})["author"] = author + from events.backends import get_backend + backend = get_backend(cfg) + _real_ledger_instance = TeamWriteAdapter(inner, writer, materializer, backend=backend) +``` + +**`handlers/sync_middleware.py`** — add team-sync companion to `ensure_ledger_synced`: + +```python +_LAST_TEAM_PULL_AT: dict[str, float] = {} # repo_path -> monotonic ts +_TEAM_PULL_TTL_S = 30.0 + +async def ensure_team_synced(ctx) -> None: + """Pull peer events from the team backend, TTL-cached per repo.""" + ledger = getattr(ctx, "ledger", None) + if ledger is None or not hasattr(ledger, "_backend") or ledger._backend is None: + return + repo = getattr(ctx, "repo_path", "") or "." + now = time.monotonic() + last = _LAST_TEAM_PULL_AT.get(repo, 0.0) + if now - last < _TEAM_PULL_TTL_S: + return + try: + await ledger._backend.pull_events( + ledger._writer.events_dir, since_token=None) + await ledger._materializer.replay_new_events(ledger._inner) + _LAST_TEAM_PULL_AT[repo] = now + except Exception as exc: + logger.debug("[sync_middleware] team pull failed: %s", exc) + +async def flush_team_writes(ctx) -> None: + ledger = getattr(ctx, "ledger", None) + if ledger is not None and hasattr(ledger, "flush_to_backend"): + try: + await ledger.flush_to_backend() + except Exception as exc: + logger.debug("[sync_middleware] team flush failed: %s", exc) +``` + +**`server.py`** — at the dispatch site that already calls `ensure_ledger_synced(ctx)`, add `await ensure_team_synced(ctx)` immediately after; in the `finally` of the dispatch body, add `await flush_team_writes(ctx)`. (Exact line numbers determined during implementation; mirror the existing pattern.) + +### Unit Tests + +- `tests/test_backends_local_folder.py` (new): + - `test_push_uploads_when_remote_missing` — write local file, push, assert remote bytes match. + - `test_push_skips_when_remote_hash_matches` — push twice, assert second push does NOT modify remote mtime (or assert via spy that `shutil.copy2` was called once). + - `test_pull_copies_peer_files_only` — populate remote with `mine.jsonl` and `peer.jsonl`; pull as `mine`; assert `peer.jsonl` arrived, `mine.jsonl` was NOT overwritten. + - `test_pull_skips_unchanged` — second pull with no remote change is a no-op (file mtime unchanged). + - `test_list_peers_yields_email_stems` — populate remote with three `*.jsonl`; assert `list_peers()` yields the three stems. + - `test_lock_serializes_concurrent_acquirers` — two coroutines acquire same `lock(name)`; assert ordering is preserved (second waits for first). + +- `tests/test_team_adapter_with_backend.py` (new): + - `test_connect_pulls_then_replays` — fake backend yields a peer event file; `await adapter.connect()`; assert the inner adapter saw the peer's `ingest_payload(...)` call. + - `test_write_marks_dirty_then_flush_pushes` — call `ingest_payload`; assert `flush_to_backend()` invokes `backend.push_events(self._writer.path, ...)`; second flush without writes is a no-op. + - `test_no_backend_means_no_push_no_pull` — `backend=None`; full mutation cycle works; no errors raised. + +- `tests/test_team_round_trip_local_folder.py` (new — integration): + - `test_two_authors_round_trip` — spin up two `TeamWriteAdapter` instances pointing at the SAME `remote_root` but different `events_dir` and different author emails; author A ingests a decision, calls `flush_to_backend()`; author B calls `connect()` (which triggers pull + replay); assert author B's inner adapter has the decision. + - `test_pull_idempotent_across_invocations` — same scenario, run B's `ensure_team_synced` cycle three times; assert only the FIRST pull caused a replay. + +- `tests/test_sync_middleware_team.py` (new): + - `test_ensure_team_synced_ttl_cache` — first call hits backend; second call within 30 s does not; third call after TTL hits again. Use a stub clock. + - `test_ensure_team_synced_no_backend_is_noop` — solo-mode ledger; `ensure_team_synced` returns without error and without calling anything. + - `test_flush_team_writes_swallows_backend_errors` — backend.push raises; `flush_team_writes` does not propagate (logs at DEBUG). + +## Phase 2: GoogleDriveAdapter + OAuth flow + +### Affected Files + +- `events/backends/google_drive.py` (new) — `GoogleDriveAdapter` implementing `BackendAdapter` against the Drive Files API +- `requirements.txt` — add `google-auth-oauthlib>=1.2`, `google-api-python-client>=2.100` +- `pyproject.toml` — same additions under `[project.dependencies]` (or whichever group `requirements.txt` mirrors) + +#### OAuth client provisioning (security alignment) + +The Bicameral repo is public. We do NOT bundle a default OAuth client_id/client_secret in source. Two configuration paths, in priority order: + +1. **Operator-supplied via env** — `BICAMERAL_GDRIVE_CLIENT_ID` and `BICAMERAL_GDRIVE_CLIENT_SECRET`. Highest priority. Suitable for CI / scripted setup. +2. **Operator-supplied via file** — `~/.bicameral/google-drive-client.json` (the JSON client config exported from Google Cloud Console for an "OAuth client ID" of type "Desktop app"). The setup wizard documents how to obtain this file in `docs/team-mode-setup.md` (3 minutes: create GCP project → enable Drive API → create OAuth consent screen → download credentials JSON). + +If neither is present, `GoogleDriveAdapter._credentials()` raises `MissingOAuthClientError` with the exact remediation text. There is NO bundled default — the operator must explicitly provision a client. This aligns with the existing `signer_email_fallback` policy in `events/writer.py` (privacy-positive defaults; explicit opt-in for anything that emits identity to a remote system). + +### Changes + +**`events/backends/google_drive.py`**: + +- Constructor: `(folder_id: str, author: str, token_path: Path = ~/.bicameral/google-drive-token.json)`. +- `_credentials()` — load cached token from `token_path`, refresh on expiry. If no cached token, resolve OAuth client per the provisioning rules above (env first, then `~/.bicameral/google-drive-client.json`, else raise `MissingOAuthClientError`); then `InstalledAppFlow.from_client_config(client_config, scopes=["https://www.googleapis.com/auth/drive.file"]).run_local_server(port=0)`. The narrowest-possible scope (`drive.file`) limits Bicameral's access to files it created/opened — the operator's other Drive content stays invisible. +- `_files_service()` — cached `googleapiclient.discovery.build("drive", "v3", credentials=...)`. +- `push_events(local_path, remote_name)`: + - Query: `files().list(q=f"'{folder_id}' in parents and name='{remote_name}'", fields="files(id, md5Checksum)")` + - If found and `md5Checksum == md5(local_path.read_bytes())`: return (no-op). + - If found but hash differs: `files().update(fileId=..., media_body=MediaFileUpload(local_path))`. + - If not found: `files().create(body={"name": remote_name, "parents": [folder_id]}, media_body=MediaFileUpload(local_path))`. +- `pull_events(local_dir, since_token)`: + - List with optional `q=f"... and modifiedTime > '{since_token}'"` when token is present. + - For each remote file whose name is `*.jsonl` and `!= f"{author}.jsonl"`: download via `files().get_media(fileId=...).execute()`; write to `local_dir / name` only if md5 differs from local. + - Return the most recent `modifiedTime` seen as the new since_token. +- `lock(remote_name)`: async context manager that `create`s `.lock` (best-effort; if create fails because file exists, retry-on-conflict semantics — caller decides). Releases by `delete()` on exit. +- `list_peers()`: list folder, yield `name.removesuffix(".jsonl")` for `*.jsonl` files. + +### Unit Tests + +- `tests/test_backends_google_drive_unit.py` (new — uses `unittest.mock` to stub the Drive client; no network): + - `test_push_skips_when_md5_matches` — stub `files().list()` to return `[{"id": "x", "md5Checksum": EXPECTED}]`; push; assert `files().update` and `files().create` were never called. + - `test_push_updates_when_md5_differs` — stub list to return mismatching md5; assert `files().update` called once with the right `fileId`. + - `test_push_creates_when_remote_missing` — stub list to return `[]`; assert `files().create` called once. + - `test_pull_writes_only_changed_peer_files` — stub list to return three peer files (one matches local md5, two differ); assert only two downloads occurred and own-file was skipped. + - `test_pull_returns_max_modified_time_as_token` — stub list with three files of different `modifiedTime`; assert returned token equals the max. + - `test_lock_create_then_delete` — assert lock entry creates `.lock`, releases by deleting it; raised exception inside the `async with` still triggers delete. + +- `tests/test_backends_google_drive_integration.py` (new — `pytest.mark.integration`, gated on `BICAMERAL_GDRIVE_TEST_FOLDER` + `BICAMERAL_GDRIVE_TEST_TOKEN` env vars; skipped in CI by default): + - `test_round_trip_against_real_folder` — push a 5-line JSONL; pull it back from a different `local_dir`; assert byte-identical. + +## Phase 3: Setup wizard prompt + docs + +### Affected Files + +- `setup_wizard.py` — extend the existing team-mode branch with a Create-vs-Join wizard for shared ledgers. Adds `_select_team_backend`, `_create_shared_ledger_drive`, `_join_shared_ledger_drive`, and `_select_local_folder_backend` helpers. Writes `team.backend`, `team.folder_id` (or `team.remote_root`), `team.role` (`founding_member` | `member`) to `.bicameral/config.yaml`. +- `docs/team-mode-setup.md` (new) — operator-facing how-to (Create vs Join, OAuth client provisioning, security model) +- `README.md` — under "What `setup` writes", add a bullet describing the new `team.backend` / `team.folder_id` / `team.role` keys + +### UX flow (the contract) + +After the operator selects "Team" in `_select_collaboration_mode`, the wizard branches: + +``` +Team mode selected + │ + ▼ +"How do you want to set up the shared ledger?" + ├── Create new shared ledger → Create branch + ├── Join existing shared ledger → Join branch + └── Use a shared filesystem instead (advanced) → LocalFolder branch +``` + +#### Create branch (founding member) + +1. "Where will the shared ledger live?" → currently only `Google Drive` (single-option list, future-proof). +2. **OAuth client check** — verify `BICAMERAL_GDRIVE_CLIENT_ID/SECRET` env OR `~/.bicameral/google-drive-client.json` is present. If not, surface the 3-minute GCP setup blurb with link to `docs/team-mode-setup.md` §"Provision an OAuth client" and exit (operator re-runs setup after). +3. **OAuth flow** — `GoogleDriveAdapter._credentials()` runs the local-loopback flow with scope `drive.file`. Token cached at `~/.bicameral/google-drive-token.json` (mode 0600). +4. **Folder creation** — call Drive API `files().create(body={"name": f"bicameral-{repo_basename}-ledger", "mimeType": "application/vnd.google-apps.folder"})`. Capture the new folder ID. +5. **Sharing instructions** — print the folder URL (`https://drive.google.com/drive/folders/`) and the literal text the founding member should send teammates: "Share this folder with your teammates as Editor. They run `bicameral setup`, choose Join, and paste the folder ID: ``." Do NOT auto-share — the founding member's Google account governs ACL. +6. **Persist** — write to `.bicameral/config.yaml`: + ```yaml + mode: team + team: + backend: google_drive + folder_id: + role: founding_member + ``` + +#### Join branch (subsequent member) + +1. "Paste the shared ledger folder ID (your teammate sent this to you):" → `prompt_text`. Accept either a raw ID or a full Drive URL (extract the ID via regex). +2. **OAuth client check** — same as Create. +3. **OAuth flow** — same as Create. +4. **Verify access** — call `files().get(fileId=folder_id, fields="id,name,capabilities")`. If 404, error with "Folder not found — check the ID, or ask the founding member to share it with your Google account." If `capabilities.canEdit == False`, error with "You have read-only access — ask the founding member to grant Editor." Both are blocking; do not persist on failure. +5. **Identity check** — read git `user.email` for the repo. Print: "You'll appear in the team ledger as `` (per your `signer_email_fallback: ` policy in events/writer.py). Continue? [y/N]". Default no — operator must affirm. This makes the privacy posture an explicit decision at join time rather than a silent ledger emission. +6. **Persist**: + ```yaml + mode: team + team: + backend: google_drive + folder_id: + role: member + ``` + +#### LocalFolder branch (advanced) + +Single prompt: "Path to a shared folder mounted on every teammate's machine (NFS, Dropbox, syncthing, ...):". Validate the path exists and is writable. Persist: +```yaml +mode: team +team: + backend: local_folder + remote_root: + role: member +``` + +(No Create/Join distinction here — file-system ACLs handle it; whoever can write to the folder is in the team.) + +### Changes + +```python +def _select_team_backend(repo_path: str) -> dict: + """Top-level Create vs Join vs LocalFolder dispatch. Returns team config dict.""" + intent = prompt_choice( + "How do you want to set up the shared ledger?", + choices=[ + ("create", "Create a new shared ledger (you become the founding member)"), + ("join", "Join an existing shared ledger (paste a folder ID from a teammate)"), + ("local_folder", "Use a shared filesystem instead (NFS, Dropbox, syncthing) — advanced"), + ], + default="create", + ) + if intent == "create": + return _create_shared_ledger_drive(repo_path) + if intent == "join": + return _join_shared_ledger_drive(repo_path) + return _select_local_folder_backend() + +def _create_shared_ledger_drive(repo_path: str) -> dict: + _check_oauth_client_or_exit() + from events.backends.google_drive import GoogleDriveAdapter + adapter = GoogleDriveAdapter(folder_id=None, author=_resolved_signer(repo_path)) + adapter._credentials() # runs OAuth, caches token + repo_name = Path(repo_path).resolve().name + folder_id = adapter.create_folder(name=f"bicameral-{repo_name}-ledger") + print(_share_instructions(folder_id)) # prints the URL + paste-this-to-teammates text + return {"backend": "google_drive", "folder_id": folder_id, "role": "founding_member"} + +def _join_shared_ledger_drive(repo_path: str) -> dict: + raw = prompt_text("Paste the shared ledger folder ID (or full Drive URL):") + folder_id = _extract_folder_id(raw) # accepts either form + _check_oauth_client_or_exit() + from events.backends.google_drive import GoogleDriveAdapter + adapter = GoogleDriveAdapter(folder_id=folder_id, author=_resolved_signer(repo_path)) + adapter._credentials() + adapter.verify_access() # raises FolderNotFoundError or ReadOnlyAccessError + signer = _resolved_signer(repo_path) + if not prompt_yes_no( + f"You'll appear in the team ledger as `{signer}`. Continue?", default=False + ): + sys.exit("Aborted — adjust signer_email_fallback in your existing config and re-run.") + return {"backend": "google_drive", "folder_id": folder_id, "role": "member"} + +def _select_local_folder_backend() -> dict: + path = prompt_text("Path to the shared folder (must exist on every teammate's machine):") + p = Path(path).expanduser().resolve() + if not p.exists() or not os.access(p, os.W_OK): + sys.exit(f"Path not writable: {p}") + return {"backend": "local_folder", "remote_root": str(p), "role": "member"} +``` + +`GoogleDriveAdapter` gains two helpers (added to Phase 2 scope as a follow-on): +- `create_folder(name) -> str` — returns the new folder ID. +- `verify_access() -> None` — raises `FolderNotFoundError` (404) or `ReadOnlyAccessError` (`canEdit == False`). + +### Documentation (`docs/team-mode-setup.md`) + +Sections in order: + +1. **What is team mode** — solo vs team in one paragraph; what "shared ledger" means. +2. **Create vs Join** — table: when each applies; what the founding member is responsible for (folder ACL, OAuth client provisioning). +3. **Provision an OAuth client (3 minutes)** — step-by-step GCP screenshots-equivalent prose: create project, enable Drive API, create OAuth consent screen (External, test users), create credentials (Desktop app), download JSON, save as `~/.bicameral/google-drive-client.json` OR export as env vars. Why we don't bundle: "Bicameral is open source; bundling our own OAuth client lets anyone publish a 'Bicameral' consent screen and harvest scopes." +4. **Run setup — Create flow** — terminal transcript walkthrough. +5. **Run setup — Join flow** — terminal transcript walkthrough; emphasize the identity confirmation step. +6. **Verifying replication** — operator A ingests a decision; operator B runs `bicameral.history` within 60 s; should appear. +7. **Permissions and revocation** — Drive Editor required to write; revoking Editor stops new pushes immediately, but past events remain in peers' local DBs (event log is append-only). +8. **Privacy posture** — explain `signer_email_fallback`, what the JSONL author field carries, where the OAuth token lives (`~/.bicameral/google-drive-token.json`, mode 0600). +9. **Local-folder backend** — when to use it (all-on-NFS shops); single section because the wizard is one prompt. +10. **Troubleshooting** — common failures: 404 on Join (sharing not propagated), OAuth refresh failures (token deleted; re-run setup), folder ID format mismatch. + +### Unit Tests + +- `tests/test_setup_wizard_team_backend.py` (new): + - `test_create_branch_persists_founding_member_role` — stub `_check_oauth_client_or_exit`, stub `GoogleDriveAdapter._credentials` and `.create_folder` to return `"abc123"`; drive wizard with `intent="create"`; assert config.yaml has `team.backend: google_drive`, `team.folder_id: abc123`, `team.role: founding_member`. + - `test_join_branch_verifies_access_before_persist` — stub `verify_access` to raise `FolderNotFoundError`; assert wizard exits non-zero AND no config.yaml is written. + - `test_join_branch_extracts_folder_id_from_url` — pass `https://drive.google.com/drive/folders/xyz789?usp=sharing` to the prompt stub; assert persisted `folder_id == "xyz789"`. + - `test_join_branch_aborts_on_identity_decline` — stub identity confirmation prompt to return False; assert SystemExit and no config write. + - `test_local_folder_branch_rejects_unwritable_path` — pass a path the test process cannot write to (e.g. `/`); assert SystemExit with the path in the message. + - `test_oauth_client_missing_blocks_create_and_join` — env unset, no `~/.bicameral/google-drive-client.json`; both Create and Join branches surface the remediation message and exit cleanly (no partial config write). + +- `tests/test_google_drive_adapter_helpers.py` (new — Phase 3 additions to the Phase 2 adapter): + - `test_create_folder_returns_id` — stub `files().create()` to return `{"id": "new123"}`; assert `create_folder("bicameral-foo-ledger") == "new123"`. + - `test_verify_access_raises_on_404` — stub `files().get()` to raise `HttpError(404)`; assert `FolderNotFoundError` raised with the folder ID in the message. + - `test_verify_access_raises_on_read_only` — stub to return `{"capabilities": {"canEdit": False}}`; assert `ReadOnlyAccessError`. + - `test_verify_access_passes_when_can_edit` — stub to return `{"capabilities": {"canEdit": True}}`; assert no exception. + +- (No new tests for `docs/team-mode-setup.md` or `README.md` — content review only.) + +## CI Commands + +- `cd pilot/mcp && pytest tests/test_backends_local_folder.py tests/test_team_adapter_with_backend.py tests/test_team_round_trip_local_folder.py tests/test_sync_middleware_team.py -v` — Phase 1 unit + integration (no network) +- `cd pilot/mcp && pytest tests/test_backends_google_drive_unit.py -v` — Phase 2 unit (mocked Drive client) +- `cd pilot/mcp && pytest tests/test_backends_google_drive_integration.py -v -m integration` — Phase 2 integration (gated on env vars; skipped in CI by default) +- `cd pilot/mcp && pytest tests/test_setup_wizard_team_backend.py tests/test_google_drive_adapter_helpers.py -v` — Phase 3 wizard + adapter helpers +- `cd pilot/mcp && ruff check events/ adapters/ handlers/sync_middleware.py setup_wizard.py` — lint matches CI +- `cd pilot/mcp && pytest tests/ -k 'team or events or sync_middleware'` — regression sweep across the touched surface From a366cb535ec94f80ddc791985eba72641a219712 Mon Sep 17 00:00:00 2001 From: jinhongkuan Date: Fri, 8 May 2026 22:56:29 -0700 Subject: [PATCH 2/3] style: ruff format the #277 surface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pure formatting — `ruff format` against the 10 files touched in #277. No semantic changes. CI's `ruff format --check .` now passes. Co-Authored-By: Claude Opus 4.7 (1M context) --- adapters/ledger.py | 4 +-- events/backends/__init__.py | 2 ++ events/backends/google_drive.py | 15 +++------- events/team_adapter.py | 8 ++---- setup_wizard.py | 26 +++++++++++++----- tests/test_backends_google_drive_unit.py | 35 +++++++++++++++++------- tests/test_backends_local_folder.py | 1 + tests/test_setup_wizard_team_backend.py | 27 ++++++------------ tests/test_sync_middleware_team.py | 4 ++- tests/test_team_adapter_with_backend.py | 27 ++++++++++-------- 10 files changed, 82 insertions(+), 67 deletions(-) diff --git a/adapters/ledger.py b/adapters/ledger.py index 5fff795f..cabab39b 100644 --- a/adapters/ledger.py +++ b/adapters/ledger.py @@ -95,9 +95,7 @@ def get_ledger(): ) backend = None - _real_ledger_instance = TeamWriteAdapter( - inner, writer, materializer, backend=backend - ) + _real_ledger_instance = TeamWriteAdapter(inner, writer, materializer, backend=backend) backend_kind = (cfg.get("team") or {}).get("backend") or "local-only" logger.info( "[ledger] team mode — events at %s (author: %s, backend: %s)", diff --git a/events/backends/__init__.py b/events/backends/__init__.py index 30839281..d9d594dc 100644 --- a/events/backends/__init__.py +++ b/events/backends/__init__.py @@ -58,8 +58,10 @@ def get_backend(config: dict) -> BackendAdapter | None: author = team.get("author", "") if kind == "local_folder": from .local_folder import LocalFolderAdapter + return LocalFolderAdapter(remote_root=Path(team["remote_root"]), author=author) if kind == "google_drive": from .google_drive import GoogleDriveAdapter + return GoogleDriveAdapter(folder_id=team["folder_id"], author=author) return None diff --git a/events/backends/google_drive.py b/events/backends/google_drive.py index d3c24ea0..cb325e5e 100644 --- a/events/backends/google_drive.py +++ b/events/backends/google_drive.py @@ -91,9 +91,8 @@ def _bundled_client_config() -> dict: project is provisioned. Once Jin replaces the constants, this branch becomes unreachable. """ - if ( - _BUNDLED_CLIENT_ID.startswith(_PLACEHOLDER_PREFIX) - or _BUNDLED_CLIENT_SECRET.startswith(_PLACEHOLDER_PREFIX) + if _BUNDLED_CLIENT_ID.startswith(_PLACEHOLDER_PREFIX) or _BUNDLED_CLIENT_SECRET.startswith( + _PLACEHOLDER_PREFIX ): raise OAuthClientNotProvisionedError( "Bicameral's Google Drive OAuth client isn't published yet. " @@ -143,9 +142,7 @@ def _credentials(self): creds = None if self._token_path.exists(): - creds = Credentials.from_authorized_user_file( - str(self._token_path), [DRIVE_SCOPE] - ) + creds = Credentials.from_authorized_user_file(str(self._token_path), [DRIVE_SCOPE]) if creds and creds.valid: return creds if creds and creds.expired and creds.refresh_token: @@ -300,11 +297,7 @@ def verify_access(self) -> None: creds = self._credentials() svc = _build_drive_service(creds) try: - meta = ( - svc.files() - .get(fileId=self._folder_id, fields="id,capabilities") - .execute() - ) + meta = svc.files().get(fileId=self._folder_id, fields="id,capabilities").execute() except HttpError as exc: if getattr(exc, "resp", None) is not None and exc.resp.status == 404: raise FolderNotFoundError( diff --git a/events/team_adapter.py b/events/team_adapter.py index e5ba308d..b34d8b7e 100644 --- a/events/team_adapter.py +++ b/events/team_adapter.py @@ -40,9 +40,7 @@ async def connect(self) -> None: await self._inner.connect() if self._backend is not None: try: - await self._backend.pull_events( - self._writer.events_dir, since_token=None - ) + await self._backend.pull_events(self._writer.events_dir, since_token=None) except Exception as exc: logger.warning("[team] backend pull failed on connect: %s", exc) replayed = await self._materializer.replay_new_events(self._inner) @@ -59,9 +57,7 @@ async def flush_to_backend(self) -> None: """ if self._backend is None or not self._dirty: return - await self._backend.push_events( - self._writer.path, remote_name=self._writer.path.name - ) + await self._backend.push_events(self._writer.path, remote_name=self._writer.path.name) self._dirty = False async def _ensure_ready(self) -> None: diff --git a/setup_wizard.py b/setup_wizard.py index 075c739b..bc9df6a2 100644 --- a/setup_wizard.py +++ b/setup_wizard.py @@ -1030,22 +1030,34 @@ def _print_drive_security_disclosure() -> None: print(border) print() print(f" {bold}What flows where{reset}") - print(f" {_color('•', _ANSI_GREEN)} Decision data (transcripts, payloads) flows your-CLI ↔ Google directly.") + print( + f" {_color('•', _ANSI_GREEN)} Decision data (transcripts, payloads) flows your-CLI ↔ Google directly." + ) print(" Bicameral the company does NOT receive copies. No Bicameral server in the loop.") - print(f" {_color('•', _ANSI_GREEN)} Your OAuth token stays on your machine ({_color('~/.bicameral/google-drive-token.json', _ANSI_CYAN)}, mode 0600).") + print( + f" {_color('•', _ANSI_GREEN)} Your OAuth token stays on your machine ({_color('~/.bicameral/google-drive-token.json', _ANSI_CYAN)}, mode 0600)." + ) print() print(f" {bold}What the `drive.file` scope means for the rest of your Drive{reset}") - print(f" {_color('•', _ANSI_GREEN)} The Bicameral CLI on your machine can only touch files it creates") + print( + f" {_color('•', _ANSI_GREEN)} The Bicameral CLI on your machine can only touch files it creates" + ) print(" in the team folder. Your other Drive content (other folders, Google Docs,") print(" shared files) is invisible to the CLI — Google enforces this.") print() print(f" {bold}What Bicameral the company CAN see (as the OAuth app publisher){reset}") - print(f" {_color('•', _ANSI_YELLOW)} Aggregate API request counts. {_color('Not contents.', _ANSI_BOLD)}") - print(f" {_color('•', _ANSI_YELLOW)} OAuth consent records: which Google accounts authenticated, when.") + print( + f" {_color('•', _ANSI_YELLOW)} Aggregate API request counts. {_color('Not contents.', _ANSI_BOLD)}" + ) + print( + f" {_color('•', _ANSI_YELLOW)} OAuth consent records: which Google accounts authenticated, when." + ) print() print(f" {bold}The trust dependency you're accepting{reset}") print(" Same as any OAuth tool (gh, gcloud, Notion, Slack desktop): you trust") - print(" that the open-source CLI behaves as advertised. Source: github.com/BicameralAI/bicameral-mcp") + print( + " that the open-source CLI behaves as advertised. Source: github.com/BicameralAI/bicameral-mcp" + ) print() print(" Full security model: docs/team-mode-setup.md") print(border) @@ -1103,7 +1115,7 @@ def _create_shared_ledger_drive(repo_path: Path) -> dict: print(" Send this to your teammates so they can Join:") print( f' "Share this folder with my teammate as Editor, then run `bicameral setup` ' - f"and paste this folder ID: {folder_id}\"" + f'and paste this folder ID: {folder_id}"' ) print() return {"backend": "google_drive", "folder_id": folder_id, "role": "founding_member"} diff --git a/tests/test_backends_google_drive_unit.py b/tests/test_backends_google_drive_unit.py index f4688b4c..3b99f53d 100644 --- a/tests/test_backends_google_drive_unit.py +++ b/tests/test_backends_google_drive_unit.py @@ -53,9 +53,7 @@ def stub_credentials(): def _files_list(svc, returned_files): - svc.files.return_value.list.return_value.execute.return_value = { - "files": returned_files - } + svc.files.return_value.list.return_value.execute.return_value = {"files": returned_files} @pytest.mark.asyncio @@ -107,9 +105,7 @@ async def test_push_creates_when_remote_missing(tmp_path: Path, stub_drive, stub @pytest.mark.asyncio -async def test_pull_writes_only_changed_peer_files( - tmp_path: Path, stub_drive, stub_credentials -): +async def test_pull_writes_only_changed_peer_files(tmp_path: Path, stub_drive, stub_credentials): from events.backends.google_drive import GoogleDriveAdapter local_dir = tmp_path / "local" @@ -120,11 +116,27 @@ async def test_pull_writes_only_changed_peer_files( _files_list( stub_drive, [ - {"id": "alice-id", "name": "alice@x.com.jsonl", "md5Checksum": "x", "modifiedTime": "2026-05-08T10:00:00Z"}, - {"id": "bob-id", "name": "bob@x.com.jsonl", "md5Checksum": _md5(bob_existing), "modifiedTime": "2026-05-08T11:00:00Z"}, - {"id": "carol-id", "name": "carol@x.com.jsonl", "md5Checksum": "y", "modifiedTime": "2026-05-08T12:00:00Z"}, + { + "id": "alice-id", + "name": "alice@x.com.jsonl", + "md5Checksum": "x", + "modifiedTime": "2026-05-08T10:00:00Z", + }, + { + "id": "bob-id", + "name": "bob@x.com.jsonl", + "md5Checksum": _md5(bob_existing), + "modifiedTime": "2026-05-08T11:00:00Z", + }, + { + "id": "carol-id", + "name": "carol@x.com.jsonl", + "md5Checksum": "y", + "modifiedTime": "2026-05-08T12:00:00Z", + }, ], ) + # Stub the get_media chain: returns a request whose .execute() returns bytes. def _media_for(fileId): media = MagicMock() @@ -138,7 +150,10 @@ def _media_for(fileId): # Carol is the only peer that should have been downloaded. # Alice is owned (skipped); Bob's md5 matches local (skipped). - downloaded_ids = [c.kwargs.get("fileId") or c.args[0] for c in stub_drive.files.return_value.get_media.call_args_list] + downloaded_ids = [ + c.kwargs.get("fileId") or c.args[0] + for c in stub_drive.files.return_value.get_media.call_args_list + ] assert downloaded_ids == ["carol-id"] assert (local_dir / "carol@x.com.jsonl").read_bytes() == b"new-content-for-carol-id" # Alice's own file must not be created locally diff --git a/tests/test_backends_local_folder.py b/tests/test_backends_local_folder.py index aa6c8a74..2ef7f059 100644 --- a/tests/test_backends_local_folder.py +++ b/tests/test_backends_local_folder.py @@ -41,6 +41,7 @@ async def test_push_skips_when_remote_hash_matches(tmp_path: Path) -> None: # Bump local mtime but keep contents identical → push must be a no-op. import os + os.utime(local, ns=(first_mtime + 1_000_000_000, first_mtime + 1_000_000_000)) await adapter.push_events(local, "alice@x.com.jsonl") diff --git a/tests/test_setup_wizard_team_backend.py b/tests/test_setup_wizard_team_backend.py index 7f633ac1..851c9a82 100644 --- a/tests/test_setup_wizard_team_backend.py +++ b/tests/test_setup_wizard_team_backend.py @@ -31,7 +31,10 @@ def _read_yaml(path: Path) -> dict: def test_extract_folder_id_accepts_raw_id(): from setup_wizard import _extract_folder_id - assert _extract_folder_id("1AbCdEfGhIjKl_mNoPqRsTuVwXyZ-abcd") == "1AbCdEfGhIjKl_mNoPqRsTuVwXyZ-abcd" + assert ( + _extract_folder_id("1AbCdEfGhIjKl_mNoPqRsTuVwXyZ-abcd") + == "1AbCdEfGhIjKl_mNoPqRsTuVwXyZ-abcd" + ) def test_extract_folder_id_accepts_full_url(): @@ -58,9 +61,7 @@ def test_create_branch_persists_founding_member_role(tmp_path: Path, monkeypatch fake_adapter = MagicMock() fake_adapter.create_folder.return_value = "abc123" - with patch( - "events.backends.google_drive.GoogleDriveAdapter", return_value=fake_adapter - ): + with patch("events.backends.google_drive.GoogleDriveAdapter", return_value=fake_adapter): team_cfg = _create_shared_ledger_drive(repo_path=tmp_path) assert team_cfg == { @@ -100,9 +101,7 @@ def test_join_branch_extracts_folder_id_from_url(tmp_path: Path, monkeypatch): monkeypatch.setattr("setup_wizard._prompt_yes_no", lambda *a, **kw: True) fake_adapter = MagicMock() - with patch( - "events.backends.google_drive.GoogleDriveAdapter", return_value=fake_adapter - ): + with patch("events.backends.google_drive.GoogleDriveAdapter", return_value=fake_adapter): team_cfg = _join_shared_ledger_drive(repo_path=tmp_path) assert team_cfg["folder_id"] == "xyz789" @@ -132,9 +131,7 @@ def test_join_branch_verifies_access_before_persist(tmp_path: Path, monkeypatch) fake_adapter = MagicMock() fake_adapter.verify_access.side_effect = FolderNotFoundError("missing-id not found") - with patch( - "events.backends.google_drive.GoogleDriveAdapter", return_value=fake_adapter - ): + with patch("events.backends.google_drive.GoogleDriveAdapter", return_value=fake_adapter): with pytest.raises(SystemExit): _join_shared_ledger_drive(repo_path=tmp_path) @@ -150,9 +147,7 @@ def test_join_branch_aborts_on_identity_decline(tmp_path: Path, monkeypatch): monkeypatch.setattr("setup_wizard._prompt_yes_no", lambda *a, **kw: False) fake_adapter = MagicMock() - with patch( - "events.backends.google_drive.GoogleDriveAdapter", return_value=fake_adapter - ): + with patch("events.backends.google_drive.GoogleDriveAdapter", return_value=fake_adapter): with pytest.raises(SystemExit): _join_shared_ledger_drive(repo_path=tmp_path) @@ -166,9 +161,7 @@ def test_create_aborts_when_security_disclosure_declined(tmp_path: Path, monkeyp monkeypatch.setattr("setup_wizard._prompt_yes_no", lambda *a, **kw: False) fake_adapter = MagicMock() - with patch( - "events.backends.google_drive.GoogleDriveAdapter", return_value=fake_adapter - ): + with patch("events.backends.google_drive.GoogleDriveAdapter", return_value=fake_adapter): with pytest.raises(SystemExit, match="Aborted"): _create_shared_ledger_drive(repo_path=tmp_path) fake_adapter._credentials.assert_not_called() @@ -233,5 +226,3 @@ def test_local_folder_branch_rejects_unwritable_path(tmp_path: Path, monkeypatch monkeypatch.setattr("setup_wizard._prompt_text_or_exit", lambda *a, **kw: "/") with pytest.raises(SystemExit, match="not writable"): _select_local_folder_backend() - - diff --git a/tests/test_sync_middleware_team.py b/tests/test_sync_middleware_team.py index b012ae6f..53f6c49c 100644 --- a/tests/test_sync_middleware_team.py +++ b/tests/test_sync_middleware_team.py @@ -48,7 +48,9 @@ class StubLedger: def __init__(self, backend, materializer) -> None: self._backend = backend self._inner = StubInner() - self._writer = SimpleNamespace(events_dir=Path("/tmp/never-touched"), path=Path("/tmp/x.jsonl")) + self._writer = SimpleNamespace( + events_dir=Path("/tmp/never-touched"), path=Path("/tmp/x.jsonl") + ) self._materializer = materializer self.flush_count = 0 self._raise_on_flush = False diff --git a/tests/test_team_adapter_with_backend.py b/tests/test_team_adapter_with_backend.py index ab1f1011..ff97167a 100644 --- a/tests/test_team_adapter_with_backend.py +++ b/tests/test_team_adapter_with_backend.py @@ -66,7 +66,9 @@ def _payload(intent: str, source_ref: str) -> dict: } -def _build(events_dir: Path, local_dir: Path, author: str, backend) -> tuple[TeamWriteAdapter, SurrealDBLedgerAdapter]: +def _build( + events_dir: Path, local_dir: Path, author: str, backend +) -> tuple[TeamWriteAdapter, SurrealDBLedgerAdapter]: inner = SurrealDBLedgerAdapter(url="memory://") writer = EventFileWriter(events_dir, author) materializer = EventMaterializer(events_dir, local_dir) @@ -81,16 +83,19 @@ async def test_connect_pulls_then_replays(tmp_path: Path) -> None: peer_payload = _payload("peer-intent", "peer-1") import json - peer_event = json.dumps( - { - "schema_version": 2, - "event_type": "ingest.completed", - "author": "peer@x.com", - "timestamp": "2026-05-08T00:00:00Z", - "payload": peer_payload, - }, - separators=(",", ":"), - ) + "\n" + peer_event = ( + json.dumps( + { + "schema_version": 2, + "event_type": "ingest.completed", + "author": "peer@x.com", + "timestamp": "2026-05-08T00:00:00Z", + "payload": peer_payload, + }, + separators=(",", ":"), + ) + + "\n" + ) backend = FakeBackend(prepopulate={"peer@x.com.jsonl": peer_event.encode()}) events_dir = tmp_path / "events" From 901b21d4ce037f1a41b7b26387406d57e8f1f7d6 Mon Sep 17 00:00:00 2001 From: jinhongkuan Date: Fri, 8 May 2026 23:00:14 -0700 Subject: [PATCH 3/3] fix(types): LocalFolder + GoogleDrive adapters formally subclass BackendAdapter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mypy was failing on `events/backends/__init__.py:62,66` — the factory's return type is `BackendAdapter | None`, but the two concrete adapters were structurally compatible without declaring inheritance. Added explicit `BackendAdapter` base. Both classes already implemented all four abstract methods (push_events, pull_events, lock, list_peers) — runtime check (issubclass + concrete instantiation) passes. No behavior change. Co-Authored-By: Claude Opus 4.7 (1M context) --- events/backends/google_drive.py | 4 +++- events/backends/local_folder.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/events/backends/google_drive.py b/events/backends/google_drive.py index cb325e5e..b99c17aa 100644 --- a/events/backends/google_drive.py +++ b/events/backends/google_drive.py @@ -30,6 +30,8 @@ from contextlib import asynccontextmanager from pathlib import Path +from . import BackendAdapter + logger = logging.getLogger(__name__) DRIVE_SCOPE = "https://www.googleapis.com/auth/drive.file" @@ -112,7 +114,7 @@ def _bundled_client_config() -> dict: } -class GoogleDriveAdapter: +class GoogleDriveAdapter(BackendAdapter): """BackendAdapter against a single Google Drive folder.""" def __init__( diff --git a/events/backends/local_folder.py b/events/backends/local_folder.py index 90166e44..929cb568 100644 --- a/events/backends/local_folder.py +++ b/events/backends/local_folder.py @@ -14,6 +14,8 @@ from contextlib import asynccontextmanager from pathlib import Path +from . import BackendAdapter + def _sha256_file(path: Path) -> str: h = hashlib.sha256() @@ -23,7 +25,7 @@ def _sha256_file(path: Path) -> str: return h.hexdigest() -class LocalFolderAdapter: +class LocalFolderAdapter(BackendAdapter): """Move per-author event files via a shared filesystem directory. Each author writes only to ``/.jsonl``. Pull