diff --git a/assistant/openapi.yaml b/assistant/openapi.yaml index 2c14c8c35a6..6960fd69453 100644 --- a/assistant/openapi.yaml +++ b/assistant/openapi.yaml @@ -3,7 +3,7 @@ openapi: 3.0.0 info: title: Vellum Assistant API - version: 0.6.1 + version: 0.6.2 description: Auto-generated OpenAPI specification for the Vellum Assistant runtime HTTP server. servers: - url: http://127.0.0.1:7821 diff --git a/assistant/src/__tests__/conversation-confirmation-signals.test.ts b/assistant/src/__tests__/conversation-confirmation-signals.test.ts index 4e6ef2e12af..220a010fc83 100644 --- a/assistant/src/__tests__/conversation-confirmation-signals.test.ts +++ b/assistant/src/__tests__/conversation-confirmation-signals.test.ts @@ -214,6 +214,8 @@ mock.module("../memory/canonical-guardian-store.js", () => ({ // --------------------------------------------------------------------------- import { Conversation } from "../daemon/conversation.js"; +import { HostBashProxy } from "../daemon/host-bash-proxy.js"; +import { HostBrowserProxy } from "../daemon/host-browser-proxy.js"; // --------------------------------------------------------------------------- // Helpers @@ -558,3 +560,156 @@ describe("sendToClient receives state signals", () => { }); }); }); + +describe("restoreBrowserProxyAvailability", () => { + test("re-enables only the host browser proxy after clearProxyAvailability", () => { + const conversation = makeConversation(); + const browserProxy = new HostBrowserProxy(() => {}); + const bashProxy = new HostBashProxy(() => {}); + conversation.setHostBrowserProxy(browserProxy); + conversation.setHostBashProxy(bashProxy); + + // Mark as having a connected client (interactive desktop path). + conversation.updateClient(() => {}, false); + expect(browserProxy.isAvailable()).toBe(true); + expect(bashProxy.isAvailable()).toBe(true); + + // The drain queue clears all proxies for non-interactive turns. + conversation.clearProxyAvailability(); + expect(browserProxy.isAvailable()).toBe(false); + expect(bashProxy.isAvailable()).toBe(false); + + // restoreBrowserProxyAvailability should bring back ONLY the browser proxy. + conversation.restoreBrowserProxyAvailability(); + expect(browserProxy.isAvailable()).toBe(true); + expect(bashProxy.isAvailable()).toBe(false); + }); + + test("re-enables the browser proxy even when hasNoClient is true (chrome-extension)", () => { + // Regression: chrome-extension is non-interactive (hasNoClient stays + // true so host_bash/host_file tools remain gated), but we still need + // to provision the hostBrowserProxy so it can service CDP commands. + // The helper must NOT gate on hasNoClient. + const conversation = makeConversation(); + const browserProxy = new HostBrowserProxy(() => {}); + conversation.setHostBrowserProxy(browserProxy); + + // updateClient with hasNoClient=true emulates the non-interactive + // chrome-extension turn. Host proxies start disabled because + // updateClient propagates hasNoClient through to updateSender. + conversation.updateClient(() => {}, true); + expect(browserProxy.isAvailable()).toBe(false); + expect(conversation["hasNoClient"]).toBe(true); + + // The targeted helper bypasses the hasNoClient gate so the + // single-capability chrome-extension turn can drive the browser + // via CDP without flipping hasNoClient (which would also enable + // host_bash/host_file gating downstream). + conversation.restoreBrowserProxyAvailability(); + expect(browserProxy.isAvailable()).toBe(true); + // hasNoClient itself MUST remain true so that + // isToolActiveForContext keeps host_bash/host_file/host_cu gated. + expect(conversation["hasNoClient"]).toBe(true); + }); + + test("leaves bash/file/cu proxies disabled when called for chrome-extension", () => { + // Regression: the targeted helper must not accidentally re-enable + // proxies other than host_browser, even when called from a path that + // owns multiple proxies (e.g. macOS holdover state with hasNoClient + // forced true for an explicit non-interactive run). + const conversation = makeConversation(); + const browserProxy = new HostBrowserProxy(() => {}); + const bashProxy = new HostBashProxy(() => {}); + conversation.setHostBrowserProxy(browserProxy); + conversation.setHostBashProxy(bashProxy); + + conversation.updateClient(() => {}, true); + expect(browserProxy.isAvailable()).toBe(false); + expect(bashProxy.isAvailable()).toBe(false); + + conversation.restoreBrowserProxyAvailability(); + expect(browserProxy.isAvailable()).toBe(true); + // Crucial: bash proxy stays disabled. The helper must touch ONLY the + // browser proxy. + expect(bashProxy.isAvailable()).toBe(false); + }); + + test("uses hostBrowserSenderOverride when set so drain-queue restores preserve the registry-routed sender", () => { + // Regression (PR #24129 cycle 2): the queue-drain path calls + // `restoreBrowserProxyAvailability()` on dequeue, which used to pass + // `this.sendToClient` (the SSE hub emitter) to the proxy, clobbering the + // chrome-extension registry-routed sender established by the POST + // /messages handler. The override field lets the HTTP handler pin the + // registry-routed sender so the drain path preserves it. + const sseHub: ServerMessage[] = []; + const registry: ServerMessage[] = []; + const conversation = makeConversation((msg) => sseHub.push(msg)); + const browserProxy = new HostBrowserProxy(() => {}); + conversation.setHostBrowserProxy(browserProxy); + + // Simulate updateClient setting sendToClient to the SSE hub and + // marking the conversation as client-less (chrome-extension is + // non-interactive). + conversation.updateClient((msg) => sseHub.push(msg), true); + expect(browserProxy.isAvailable()).toBe(false); + + // The HTTP handler stashes the registry-routed sender as the override. + const registrySender = (msg: ServerMessage) => registry.push(msg); + conversation.hostBrowserSenderOverride = registrySender; + + // Drain-queue path calls restoreBrowserProxyAvailability — it must now + // prefer the override over sendToClient. + conversation.restoreBrowserProxyAvailability(); + expect(browserProxy.isAvailable()).toBe(true); + + // Send a frame through the proxy and verify it flows through the + // registry sender, not the SSE hub. + const internalSend = ( + browserProxy as unknown as { + sendToClient: (msg: ServerMessage) => void; + } + ).sendToClient; + const probe: ServerMessage = { + type: "host_browser_cancel", + requestId: "probe-1", + } as ServerMessage; + internalSend(probe); + expect(registry).toHaveLength(1); + expect(sseHub.some((m) => m === probe)).toBe(false); + }); + + test("falls back to sendToClient when hostBrowserSenderOverride is cleared", () => { + // When a non-chrome-extension turn takes over, the HTTP handler clears + // the override and restoreBrowserProxyAvailability must fall back to + // sendToClient (the SSE hub), otherwise macOS turns would route their + // host_browser frames through the stale chrome-extension registry. + const sseHub: ServerMessage[] = []; + const conversation = makeConversation((msg) => sseHub.push(msg)); + const browserProxy = new HostBrowserProxy(() => {}); + conversation.setHostBrowserProxy(browserProxy); + + // First the chrome-extension path pins the override. + const registry: ServerMessage[] = []; + conversation.hostBrowserSenderOverride = (msg) => registry.push(msg); + conversation.updateClient((msg) => sseHub.push(msg), true); + conversation.restoreBrowserProxyAvailability(); + + // Then a macOS handoff clears the override. + conversation.hostBrowserSenderOverride = undefined; + conversation.updateClient((msg) => sseHub.push(msg), false); + conversation.restoreBrowserProxyAvailability(); + + const internalSend = ( + browserProxy as unknown as { + sendToClient: (msg: ServerMessage) => void; + } + ).sendToClient; + const probe: ServerMessage = { + type: "host_browser_cancel", + requestId: "probe-2", + } as ServerMessage; + internalSend(probe); + expect(sseHub).toContain(probe); + expect(registry).not.toContain(probe); + }); +}); diff --git a/assistant/src/__tests__/conversation-routes-disk-view.test.ts b/assistant/src/__tests__/conversation-routes-disk-view.test.ts index 0659f797f83..776a7f20f87 100644 --- a/assistant/src/__tests__/conversation-routes-disk-view.test.ts +++ b/assistant/src/__tests__/conversation-routes-disk-view.test.ts @@ -188,6 +188,7 @@ function createFakeConversation(conversationId: string): Conversation { setHostCuProxy(this: { hostCuProxy: unknown }, proxy: unknown) { this.hostCuProxy = proxy; }, + restoreBrowserProxyAvailability: () => {}, addPreactivatedSkillId: () => {}, hasAnyPendingConfirmation: () => false, hasPendingConfirmation: () => false, diff --git a/assistant/src/__tests__/conversation-routes-guardian-reply.test.ts b/assistant/src/__tests__/conversation-routes-guardian-reply.test.ts index b96a39c8763..1936729a72d 100644 --- a/assistant/src/__tests__/conversation-routes-guardian-reply.test.ts +++ b/assistant/src/__tests__/conversation-routes-guardian-reply.test.ts @@ -173,6 +173,7 @@ describe("handleSendMessage canonical guardian reply interception", () => { setHostBrowserProxy: () => {}, setHostFileProxy: () => {}, setHostCuProxy: () => {}, + restoreBrowserProxyAvailability: () => {}, addPreactivatedSkillId: () => {}, } as unknown as import("../daemon/conversation.js").Conversation; @@ -251,6 +252,7 @@ describe("handleSendMessage canonical guardian reply interception", () => { setHostBrowserProxy: () => {}, setHostFileProxy: () => {}, setHostCuProxy: () => {}, + restoreBrowserProxyAvailability: () => {}, addPreactivatedSkillId: () => {}, } as unknown as import("../daemon/conversation.js").Conversation; @@ -325,6 +327,7 @@ describe("handleSendMessage canonical guardian reply interception", () => { setHostBrowserProxy: () => {}, setHostFileProxy: () => {}, setHostCuProxy: () => {}, + restoreBrowserProxyAvailability: () => {}, addPreactivatedSkillId: () => {}, } as unknown as import("../daemon/conversation.js").Conversation; @@ -403,6 +406,7 @@ describe("handleSendMessage canonical guardian reply interception", () => { setHostBrowserProxy: () => {}, setHostFileProxy: () => {}, setHostCuProxy: () => {}, + restoreBrowserProxyAvailability: () => {}, addPreactivatedSkillId: () => {}, } as unknown as import("../daemon/conversation.js").Conversation; @@ -477,6 +481,7 @@ describe("handleSendMessage canonical guardian reply interception", () => { setHostBrowserProxy: () => {}, setHostFileProxy: () => {}, setHostCuProxy: () => {}, + restoreBrowserProxyAvailability: () => {}, addPreactivatedSkillId: () => {}, } as unknown as import("../daemon/conversation.js").Conversation; @@ -545,6 +550,7 @@ describe("handleSendMessage canonical guardian reply interception", () => { setHostBrowserProxy: () => {}, setHostFileProxy: () => {}, setHostCuProxy: () => {}, + restoreBrowserProxyAvailability: () => {}, addPreactivatedSkillId: () => {}, } as unknown as import("../daemon/conversation.js").Conversation; @@ -615,6 +621,7 @@ describe("handleSendMessage canonical guardian reply interception", () => { setHostBrowserProxy: () => {}, setHostFileProxy: () => {}, setHostCuProxy: () => {}, + restoreBrowserProxyAvailability: () => {}, addPreactivatedSkillId: () => {}, } as unknown as import("../daemon/conversation.js").Conversation; @@ -686,6 +693,7 @@ describe("handleSendMessage canonical guardian reply interception", () => { setHostBrowserProxy: () => {}, setHostFileProxy: () => {}, setHostCuProxy: () => {}, + restoreBrowserProxyAvailability: () => {}, addPreactivatedSkillId: () => {}, } as unknown as import("../daemon/conversation.js").Conversation; diff --git a/assistant/src/__tests__/fixtures/mock-chrome-extension.ts b/assistant/src/__tests__/fixtures/mock-chrome-extension.ts new file mode 100644 index 00000000000..d8741a2df27 --- /dev/null +++ b/assistant/src/__tests__/fixtures/mock-chrome-extension.ts @@ -0,0 +1,296 @@ +/** + * Mock Chrome extension test fixture. + * + * Opens a WebSocket to the runtime's `/v1/browser-relay` endpoint using a + * caller-supplied JWT (so the upgrade handler registers the connection + * under the guardianId encoded in the token), handles incoming + * `host_browser_request` frames by calling a mock CDP proxy, and POSTs + * the result back to `/v1/host-browser-result`. + * + * Used by e2e tests (PR 15/16) to exercise the full round-trip without + * requiring a real Chrome browser or the real extension worker. + * + * The fixture is intentionally minimal — it does not implement heartbeats, + * reconnect logic, or the legacy `ExtensionCommand` dispatch path. It only + * needs to carry host_browser_request frames end-to-end. + */ + +// ── Types ─────────────────────────────────────────────────────────── + +/** Incoming `host_browser_request` envelope (wire format). */ +export interface HostBrowserRequestFrame { + type: "host_browser_request"; + requestId: string; + conversationId: string; + cdpMethod: string; + cdpParams?: Record; + cdpSessionId?: string; + timeout_seconds?: number; +} + +/** Incoming `host_browser_cancel` envelope (wire format). */ +export interface HostBrowserCancelFrame { + type: "host_browser_cancel"; + requestId: string; +} + +/** Result body POSTed back to `/v1/host-browser-result`. */ +export interface HostBrowserResultBody { + requestId: string; + content: string; + isError: boolean; +} + +/** + * Callback that handles a CDP request and returns a + * (content, isError) pair to be POSTed back to the runtime. + * + * Tests pass in a mock that simulates `chrome.debugger.sendCommand` for a + * handful of methods (e.g. `Browser.getVersion`). + */ +export type MockCdpHandler = ( + frame: HostBrowserRequestFrame, +) => Promise<{ content: string; isError: boolean }>; + +export interface MockChromeExtensionOptions { + /** Base URL of the runtime HTTP server, e.g. `http://127.0.0.1:19801`. */ + runtimeBaseUrl: string; + /** JWT bearer token for both the WebSocket handshake and the POST callback. */ + token: string; + /** + * CDP command handler. Defaults to a handler that recognises + * `Browser.getVersion` and returns a fake product string. + */ + cdpHandler?: MockCdpHandler; + /** + * Optional extra headers forwarded on the WebSocket handshake (e.g. + * `x-guardian-id` when using a service token that doesn't carry an + * actor principal id). + */ + extraHandshakeHeaders?: Record; +} + +export interface MockChromeExtension { + /** Open the WebSocket and resolve once it's connected. */ + start(): Promise; + /** Close the WebSocket and drop any in-flight request tracking. */ + stop(): Promise; + /** + * Wait until the WebSocket has transitioned to OPEN. Useful to avoid + * races between `start()` and the runtime's `register()` bookkeeping. + */ + waitForConnection(timeoutMs?: number): Promise; + /** List of every `host_browser_request` frame received, in order. */ + receivedRequests(): ReadonlyArray; + /** List of every `host_browser_cancel` frame received, in order. */ + receivedCancels(): ReadonlyArray; + /** Swap the CDP handler at runtime (tests can inject failure modes). */ + setCdpHandler(handler: MockCdpHandler): void; + /** + * Force-close the WebSocket without going through the teardown path. + * Simulates a flaky extension that drops the connection. + */ + forceDisconnect(): void; +} + +// ── Defaults ──────────────────────────────────────────────────────── + +const DEFAULT_MOCK_BROWSER_VERSION = { + product: "Chrome/MockTest", + protocolVersion: "1.3", + revision: "@mock", + userAgent: "Mozilla/5.0 (mock chrome-extension e2e fixture)", + jsVersion: "0.0.0-mock", +}; + +/** + * Default CDP handler: answers `Browser.getVersion` with a fake product + * string. Unrecognised methods return an error envelope so tests can fail + * fast instead of hanging. + */ +const defaultCdpHandler: MockCdpHandler = async (frame) => { + if (frame.cdpMethod === "Browser.getVersion") { + return { + content: JSON.stringify(DEFAULT_MOCK_BROWSER_VERSION), + isError: false, + }; + } + return { + content: `mock-chrome-extension: unsupported cdpMethod "${frame.cdpMethod}"`, + isError: true, + }; +}; + +// ── Implementation ────────────────────────────────────────────────── + +/** + * Create a mock chrome-extension client bound to the given runtime base + * URL. The fixture does not start itself; callers must invoke `start()`. + */ +export function createMockChromeExtension( + options: MockChromeExtensionOptions, +): MockChromeExtension { + const baseHttp = options.runtimeBaseUrl.replace(/\/$/, ""); + const wsBase = baseHttp.replace(/^http/i, "ws"); + const wsUrl = `${wsBase}/v1/browser-relay?token=${encodeURIComponent(options.token)}`; + + let ws: WebSocket | null = null; + let connected = false; + let handler = options.cdpHandler ?? defaultCdpHandler; + const receivedRequests: HostBrowserRequestFrame[] = []; + const receivedCancels: HostBrowserCancelFrame[] = []; + const inFlight = new Map(); + + async function handleRequestFrame( + frame: HostBrowserRequestFrame, + ): Promise { + const abortCtl = new AbortController(); + inFlight.set(frame.requestId, abortCtl); + let result: { content: string; isError: boolean }; + try { + result = await handler(frame); + } catch (err) { + result = { + content: err instanceof Error ? err.message : String(err), + isError: true, + }; + } finally { + inFlight.delete(frame.requestId); + } + // If the request was aborted mid-flight, drop the result entirely + // (mirroring the production dispatcher, which doesn't POST a result + // for cancelled requests). + if (abortCtl.signal.aborted) return; + + const body: HostBrowserResultBody = { + requestId: frame.requestId, + content: result.content, + isError: result.isError, + }; + try { + const res = await fetch(`${baseHttp}/v1/host-browser-result`, { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${options.token}`, + }, + body: JSON.stringify(body), + }); + // Consume the body so Bun doesn't leak the response handle. + await res.body?.cancel(); + } catch { + // Best-effort — if the runtime has torn down the server, the POST + // will throw. Tests assert on proxy behaviour, not POST success. + } + } + + function handleMessage(raw: string): void { + let parsed: unknown; + try { + parsed = JSON.parse(raw); + } catch { + return; + } + if (!parsed || typeof parsed !== "object") return; + const frame = parsed as Record; + if (frame.type === "host_browser_request") { + const typed = frame as unknown as HostBrowserRequestFrame; + receivedRequests.push(typed); + void handleRequestFrame(typed); + return; + } + if (frame.type === "host_browser_cancel") { + const typed = frame as unknown as HostBrowserCancelFrame; + receivedCancels.push(typed); + const abort = inFlight.get(typed.requestId); + if (abort) { + abort.abort(); + inFlight.delete(typed.requestId); + } + return; + } + // Ignore any other frames (e.g. legacy ExtensionCommand traffic). + } + + return { + async start() { + if (ws) return; + // Bun's `WebSocket` constructor accepts a second-argument options + // object with a `headers` field (a Bun-specific extension of the + // standard WebSocket API). We forward `extraHandshakeHeaders` + // through it so tests using service tokens can supply the + // `x-guardian-id` fallback expected by `/v1/browser-relay`. + // + // We cast through `unknown` because the DOM `WebSocket` type only + // knows about `(url, protocols)`. If this fixture is ever run in + // an environment that isn't Bun, the options object would be + // silently ignored — acceptable for a test fixture. + const wsOptions: { headers?: Record } = {}; + if (options.extraHandshakeHeaders) { + wsOptions.headers = options.extraHandshakeHeaders; + } + ws = new WebSocket(wsUrl, wsOptions as unknown as string | string[]); + ws.addEventListener("open", () => { + connected = true; + }); + ws.addEventListener("message", (ev: MessageEvent) => { + const data = ev.data; + if (typeof data === "string") { + handleMessage(data); + } else if (data instanceof ArrayBuffer) { + handleMessage(new TextDecoder().decode(data)); + } + }); + ws.addEventListener("close", () => { + connected = false; + }); + }, + async stop() { + const sock = ws; + ws = null; + if (sock) { + try { + sock.close(1000, "fixture shutdown"); + } catch { + // best-effort + } + } + for (const abort of inFlight.values()) { + abort.abort(); + } + inFlight.clear(); + }, + async waitForConnection(timeoutMs = 2000) { + const deadline = Date.now() + timeoutMs; + while (!connected) { + if (Date.now() > deadline) { + throw new Error( + `mock-chrome-extension: timed out waiting for WebSocket OPEN after ${timeoutMs}ms`, + ); + } + await new Promise((r) => setTimeout(r, 10)); + } + }, + receivedRequests() { + return receivedRequests; + }, + receivedCancels() { + return receivedCancels; + }, + setCdpHandler(next) { + handler = next; + }, + forceDisconnect() { + const sock = ws; + ws = null; + connected = false; + if (sock) { + try { + sock.close(4000, "forced disconnect"); + } catch { + // best-effort + } + } + }, + }; +} diff --git a/assistant/src/__tests__/gateway-only-guard.test.ts b/assistant/src/__tests__/gateway-only-guard.test.ts index 37d741c0c6b..b3cf1834d10 100644 --- a/assistant/src/__tests__/gateway-only-guard.test.ts +++ b/assistant/src/__tests__/gateway-only-guard.test.ts @@ -35,6 +35,8 @@ const ALLOWLIST = new Set([ // --- Chrome extension (local relay communication, not gateway API consumption) --- "clients/chrome-extension/background/worker.ts", "clients/chrome-extension/popup/popup.ts", + // --- Chrome extension native messaging helper (local daemon pair endpoint, by design) --- + "clients/chrome-extension-native-host/src/index.ts", // --- Documentation and comments that mention the port for explanatory purposes --- "AGENTS.md", // documents the gateway-only rule itself diff --git a/assistant/src/__tests__/host-bash-proxy.test.ts b/assistant/src/__tests__/host-bash-proxy.test.ts index 1a0ac9009ae..d38b897eec8 100644 --- a/assistant/src/__tests__/host-bash-proxy.test.ts +++ b/assistant/src/__tests__/host-bash-proxy.test.ts @@ -1,4 +1,4 @@ -import { afterEach, describe, expect, mock, test } from "bun:test"; +import { afterEach, describe, expect, jest, mock, test } from "bun:test"; const mockConfig = { timeouts: { @@ -393,6 +393,155 @@ describe("HostBashProxy", () => { }); }); + describe("abort listener lifecycle", () => { + // Helper that wraps an AbortSignal to observe add/removeEventListener + // invocations without tripping over tsc's strict overload matching on + // AbortSignal itself. + type Spied = { + signal: AbortSignal; + addCalls: string[]; + removeCalls: string[]; + }; + function spySignal(source: AbortSignal): Spied { + const addCalls: string[] = []; + const removeCalls: string[] = []; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const s = source as any; + const origAdd = source.addEventListener.bind(source); + const origRemove = source.removeEventListener.bind(source); + s.addEventListener = ( + type: string, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + ...rest: any[] + ) => { + addCalls.push(type); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + return (origAdd as any)(type, ...rest); + }; + s.removeEventListener = ( + type: string, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + ...rest: any[] + ) => { + removeCalls.push(type); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + return (origRemove as any)(type, ...rest); + }; + return { signal: source, addCalls, removeCalls }; + } + + test("removes abort listener from signal after resolve completes", async () => { + setup(); + const controller = new AbortController(); + const spy = spySignal(controller.signal); + + const resultPromise = proxy.request( + { command: "echo hello" }, + "session-1", + spy.signal, + ); + + expect(spy.addCalls).toEqual(["abort"]); + expect(spy.removeCalls).toEqual([]); + + const requestId = (sentMessages[0] as Record) + .requestId as string; + proxy.resolve(requestId, { + stdout: "hello\n", + stderr: "", + exitCode: 0, + timedOut: false, + }); + await resultPromise; + + // Listener is detached after normal completion. + expect(spy.removeCalls).toEqual(["abort"]); + + // Subsequent aborts are harmless no-ops (no side effects on the proxy). + controller.abort(); + // No additional emitted envelopes from the late abort. + expect(sentMessages).toHaveLength(1); + }); + + test("removes abort listener from signal on timer timeout", async () => { + setup(); + + jest.useFakeTimers(); + try { + const controller = new AbortController(); + const spy = spySignal(controller.signal); + + const resultPromise = proxy.request( + { command: "echo slow", timeout_seconds: 30 }, + "session-1", + spy.signal, + ); + + expect(spy.addCalls).toEqual(["abort"]); + expect(spy.removeCalls).toEqual([]); + + // Proxy timeout is timeout_seconds + 3 = 33s. Advance past it. + jest.advanceTimersByTime(34 * 1000); + + const result = await resultPromise; + expect(result.isError).toBe(true); + expect(result.content).toContain("Host bash proxy timed out"); + + // Listener is detached after the timer fires. + expect(spy.removeCalls).toEqual(["abort"]); + + // Subsequent aborts should be harmless — no cancel emitted. + controller.abort(); + expect(sentMessages).toHaveLength(1); + } finally { + jest.useRealTimers(); + } + }); + }); + + describe("sender throws synchronously", () => { + test("rejects the promise, clears pending state and timer, invokes onInternalResolve", async () => { + const resolvedIds: string[] = []; + sentMessages = []; + sendToClient = () => { + throw new Error("transport down"); + }; + proxy = new HostBashProxy(sendToClient, (id) => resolvedIds.push(id)); + + // request() synchronously calls sendToClient inside the Promise + // executor. A throw there surfaces as a rejected promise. + const resultPromise = proxy.request( + { command: "echo hello" }, + "session-1", + ); + + await expect(resultPromise).rejects.toThrow("transport down"); + + // The internal resolve should fire exactly once as part of cleanup. + expect(resolvedIds).toHaveLength(1); + + // Issue a new request on a fresh (non-throwing) sender and verify + // the proxy is still functional — no stale timers or bookkeeping + // from the failed request. + sentMessages = []; + proxy.updateSender((msg) => sentMessages.push(msg), true); + const okPromise = proxy.request({ command: "echo ok" }, "session-1"); + expect(sentMessages).toHaveLength(1); + const okRequestId = (sentMessages[0] as Record) + .requestId as string; + expect(proxy.hasPendingRequest(okRequestId)).toBe(true); + proxy.resolve(okRequestId, { + stdout: "ok\n", + stderr: "", + exitCode: 0, + timedOut: false, + }); + const okResult = await okPromise; + expect(okResult.content).toContain("ok"); + expect(okResult.isError).toBe(false); + }); + }); + describe("onInternalResolve callback", () => { test("fires on abort", async () => { const resolvedIds: string[] = []; diff --git a/assistant/src/__tests__/host-browser-e2e-cloud.test.ts b/assistant/src/__tests__/host-browser-e2e-cloud.test.ts new file mode 100644 index 00000000000..634341a294f --- /dev/null +++ b/assistant/src/__tests__/host-browser-e2e-cloud.test.ts @@ -0,0 +1,334 @@ +/** + * E2E smoke test for the cloud-hosted `host_browser_request` round-trip. + * + * Boots the runtime HTTP server in-process, opens a mock chrome-extension + * WebSocket against `/v1/browser-relay`, and drives + * `HostBrowserProxy.request()` end-to-end: + * + * proxy.request() + * → sendToClient (routed via ChromeExtensionRegistry by guardianId) + * → mock extension WebSocket receives host_browser_request + * → mock CDP handler (Browser.getVersion fake) + * → POST /v1/host-browser-result + * → handleHostBrowserResult → conversation.resolveHostBrowser + * → proxy.resolve() → request() resolves + * + * Covers: + * - Happy path: Browser.getVersion round-trips and returns the fake + * product string. + * - Abort: an aborted AbortSignal resolves with "Aborted" and the mock + * extension receives a host_browser_cancel frame. + * - Timeout: if the mock extension receives the frame but never + * POSTs a result, the proxy's setTimeout path fires and surfaces + * a "timed out waiting for client response" error. + * + * The test runs entirely in Bun + loopback WebSocket/fetch — no real + * Chrome required. + */ +import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test"; + +// ── Module mocks (must be declared before the real imports below) ──── + +mock.module("../util/logger.js", () => ({ + getLogger: () => + new Proxy({} as Record, { + get: () => () => {}, + }), +})); + +mock.module("../config/loader.js", () => ({ + getConfig: () => ({ + ui: {}, + model: "test", + provider: "test", + memory: { enabled: false }, + rateLimit: { maxRequestsPerMinute: 0 }, + secretDetection: { enabled: false }, + contextWindow: { maxInputTokens: 200000 }, + services: { + inference: { + mode: "your-own", + provider: "anthropic", + model: "claude-opus-4-6", + }, + "image-generation": { + mode: "your-own", + provider: "gemini", + model: "gemini-3.1-flash-image-preview", + }, + "web-search": { mode: "your-own", provider: "inference-provider-native" }, + }, + }), +})); + +// ── Real imports (after mocks) ────────────────────────────────────── + +import type { Conversation } from "../daemon/conversation.js"; +import { HostBrowserProxy } from "../daemon/host-browser-proxy.js"; +import type { ServerMessage } from "../daemon/message-protocol.js"; +import { getDb, initializeDb } from "../memory/db.js"; +import { mintToken } from "../runtime/auth/token-service.js"; +import { + __resetChromeExtensionRegistryForTests, + getChromeExtensionRegistry, +} from "../runtime/chrome-extension-registry.js"; +import { RuntimeHttpServer } from "../runtime/http-server.js"; +import * as pendingInteractions from "../runtime/pending-interactions.js"; + +initializeDb(); + +// ── Helpers ───────────────────────────────────────────────────────── + +/** + * Wrap a HostBrowserProxy in a sendToClient that: + * 1. Routes host_browser_request/host_browser_cancel via the Chrome + * extension registry for the given guardianId. + * 2. Registers a pending interaction for each request so the + * `/v1/host-browser-result` HTTP route can find the stub + * conversation and call `resolveHostBrowser` on it. + * + * Returns the proxy and its stub conversation. In production this + * wiring lives in `conversation-routes.ts` `makeHubPublisher`; the test + * reproduces the minimum surface needed for the round-trip. + */ +function createBoundProxy( + guardianId: string, + conversationId: string, +): { proxy: HostBrowserProxy; conversation: Conversation } { + // The stub Conversation's `resolveHostBrowser` routes straight back + // to the real proxy. Declare the proxy reference first so the stub + // can close over it before the proxy itself is constructed below. + let proxyRef: HostBrowserProxy | null = null; + const conversation = { + resolveHostBrowser( + requestId: string, + response: { content: string; isError: boolean }, + ) { + proxyRef?.resolve(requestId, response); + }, + } as unknown as Conversation; + + const sendToClient = (msg: ServerMessage) => { + // Register pending interactions for host_browser_request envelopes + // so the /v1/host-browser-result route can look them up. + if ((msg as { type: string }).type === "host_browser_request") { + const requestId = (msg as { requestId: string }).requestId; + pendingInteractions.register(requestId, { + conversation, + conversationId, + kind: "host_browser", + }); + } + const ok = getChromeExtensionRegistry().send(guardianId, msg); + if (!ok) { + throw new Error( + `chrome-extension host_browser send failed: no active connection for guardian ${guardianId}`, + ); + } + }; + + const proxy = new HostBrowserProxy(sendToClient); + proxyRef = proxy; + return { proxy, conversation }; +} + +/** + * Mint an actor-bound JWT for the given guardianId. The WebSocket + * upgrade handler parses `sub=actor::` + * and treats `actorPrincipalId` as the guardianId. + */ +function mintActorToken(guardianId: string): string { + return mintToken({ + aud: "vellum-daemon", + sub: `actor:self:${guardianId}`, + scope_profile: "actor_client_v1", + policy_epoch: 1, + ttlSeconds: 3600, + }); +} + +// ── Tests ─────────────────────────────────────────────────────────── + +describe("host_browser cloud-hosted e2e round-trip", () => { + let server: RuntimeHttpServer; + let port: number; + let runtimeBaseUrl: string; + + beforeEach(async () => { + // Each test gets a clean DB and a fresh registry so connection + // state doesn't leak between cases. + const db = getDb(); + db.run("DELETE FROM contact_channels"); + db.run("DELETE FROM contacts"); + pendingInteractions.clear(); + __resetChromeExtensionRegistryForTests(); + + port = 19800 + Math.floor(Math.random() * 200); + runtimeBaseUrl = `http://127.0.0.1:${port}`; + server = new RuntimeHttpServer({ port }); + await server.start(); + }); + + afterEach(async () => { + await server?.stop(); + pendingInteractions.clear(); + __resetChromeExtensionRegistryForTests(); + }); + + test("happy path: Browser.getVersion round-trips through the mock extension", async () => { + const guardianId = `test-guardian-${crypto.randomUUID()}`; + const token = mintActorToken(guardianId); + + // Dynamic import keeps the module cache warm across tests but avoids + // binding the fixture at file-load time (where the mocks might not + // yet have applied for a freshly forked test worker). + const { createMockChromeExtension } = + await import("./fixtures/mock-chrome-extension.js"); + const mockExt = createMockChromeExtension({ + runtimeBaseUrl, + token, + }); + await mockExt.start(); + await mockExt.waitForConnection(); + + // Give the open handler a tick to register the connection in the + // ChromeExtensionRegistry (Bun's WebSocket open callback fires + // asynchronously after the upgrade handler returns). + await waitForRegistryEntry(guardianId); + + const { proxy } = createBoundProxy(guardianId, "conv-happy"); + + const result = await proxy.request( + { cdpMethod: "Browser.getVersion" }, + "conv-happy", + ); + + expect(result.isError).toBe(false); + expect(result.content).toContain("Chrome/MockTest"); + + const received = mockExt.receivedRequests(); + expect(received).toHaveLength(1); + expect(received[0].cdpMethod).toBe("Browser.getVersion"); + expect(typeof received[0].requestId).toBe("string"); + expect(received[0].conversationId).toBe("conv-happy"); + + proxy.dispose(); + await mockExt.stop(); + }); + + test("abort: AbortSignal resolves to 'Aborted' and extension receives host_browser_cancel", async () => { + const guardianId = `test-guardian-${crypto.randomUUID()}`; + const token = mintActorToken(guardianId); + + const { createMockChromeExtension } = + await import("./fixtures/mock-chrome-extension.js"); + const mockExt = createMockChromeExtension({ + runtimeBaseUrl, + token, + // Hang forever so we can abort mid-flight without a race against + // the default handler's immediate response. + cdpHandler: () => new Promise(() => {}), + }); + await mockExt.start(); + await mockExt.waitForConnection(); + await waitForRegistryEntry(guardianId); + + const { proxy } = createBoundProxy(guardianId, "conv-abort"); + + const controller = new AbortController(); + const resultPromise = proxy.request( + { cdpMethod: "Browser.getVersion" }, + "conv-abort", + controller.signal, + ); + + // Wait for the mock extension to observe the request, then abort so + // the cancel envelope has somewhere to land. + await waitFor(() => mockExt.receivedRequests().length === 1); + + controller.abort(); + const result = await resultPromise; + + expect(result.content).toBe("Aborted"); + expect(result.isError).toBe(true); + + // The cancel frame is dispatched synchronously from the abort + // listener, but the WebSocket delivers it asynchronously — give it a + // few turns to arrive before asserting. + await waitFor(() => mockExt.receivedCancels().length === 1); + const cancels = mockExt.receivedCancels(); + expect(cancels).toHaveLength(1); + expect(cancels[0].requestId).toBe(mockExt.receivedRequests()[0].requestId); + + proxy.dispose(); + await mockExt.stop(); + }); + + test("timeout: proxy.request resolves with timeout error when client never responds", async () => { + const guardianId = `test-guardian-${crypto.randomUUID()}`; + const token = mintActorToken(guardianId); + + const { createMockChromeExtension } = + await import("./fixtures/mock-chrome-extension.js"); + // CDP handler that never resolves — the request frame reaches the + // mock extension successfully, but no result is ever POSTed back. + // This exercises the proxy's `setTimeout` path (as opposed to a + // synchronous send failure, which is a separate code path). + const mockExt = createMockChromeExtension({ + runtimeBaseUrl, + token, + cdpHandler: () => new Promise(() => {}), + }); + await mockExt.start(); + await mockExt.waitForConnection(); + await waitForRegistryEntry(guardianId); + + const { proxy } = createBoundProxy(guardianId, "conv-timeout"); + + // 50ms timeout — short enough to keep the test fast, long enough + // for the request frame to make the WS round-trip to the mock + // extension before the timer fires. + const result = await proxy.request( + { cdpMethod: "Browser.getVersion", timeout_seconds: 0.05 }, + "conv-timeout", + ); + + expect(result.isError).toBe(true); + expect(result.content).toContain("timed out"); + + // Sanity check: the frame actually reached the mock extension (so + // we know we're exercising the proxy's timer, not a send failure). + expect(mockExt.receivedRequests()).toHaveLength(1); + expect(mockExt.receivedRequests()[0].cdpMethod).toBe("Browser.getVersion"); + + proxy.dispose(); + await mockExt.stop(); + }); +}); + +// ── Local wait helpers ────────────────────────────────────────────── + +async function waitFor( + predicate: () => boolean, + timeoutMs = 2000, +): Promise { + const deadline = Date.now() + timeoutMs; + while (!predicate()) { + if (Date.now() > deadline) { + throw new Error( + `waitFor: predicate did not become true within ${timeoutMs}ms`, + ); + } + await new Promise((r) => setTimeout(r, 10)); + } +} + +async function waitForRegistryEntry( + guardianId: string, + timeoutMs = 2000, +): Promise { + await waitFor( + () => getChromeExtensionRegistry().get(guardianId) !== undefined, + timeoutMs, + ); +} diff --git a/assistant/src/__tests__/host-browser-e2e-self-hosted.test.ts b/assistant/src/__tests__/host-browser-e2e-self-hosted.test.ts new file mode 100644 index 00000000000..db5d785405d --- /dev/null +++ b/assistant/src/__tests__/host-browser-e2e-self-hosted.test.ts @@ -0,0 +1,371 @@ +/** + * End-to-end smoke test for the self-hosted native-messaging capability + * bootstrap path. + * + * This test exercises the full flow at the subprocess boundary: + * + * 1. A minimal Bun HTTP server mounts the real + * `handleBrowserExtensionPair` route from the assistant runtime. + * 2. The compiled native helper binary + * (`clients/chrome-extension-native-host/dist/index.js`) is spawned as + * a child process and pointed at that server via the `--assistant-port` + * CLI flag. + * 3. The test writes a Chrome-native-messaging-framed + * `{ type: "request_token" }` to the helper's stdin. + * 4. The helper POSTs `/v1/browser-extension-pair` on the test server, + * gets back a capability token + guardianId, and echoes them to stdout + * as a `token_response` frame. + * 5. The test asserts the returned token verifies via + * `verifyHostBrowserCapability` — i.e. a fresh install can pair the + * Chrome extension via the native helper end-to-end without any + * shortcuts. + * + * The test **skips gracefully** if the native helper hasn't been built + * (`clients/chrome-extension-native-host/dist/index.js` missing). Run + * `bun run build` in that package first to enable the full path. + */ + +import { type ChildProcessWithoutNullStreams, spawn } from "node:child_process"; +import { randomBytes } from "node:crypto"; +import { + chmodSync, + existsSync, + mkdtempSync, + readFileSync, + rmSync, + writeFileSync, +} from "node:fs"; +import { tmpdir } from "node:os"; +import { join, resolve } from "node:path"; +import { afterAll, beforeAll, describe, expect, test } from "bun:test"; + +import { + mintHostBrowserCapability, + resetCapabilityTokenSecretForTests, + setCapabilityTokenSecretForTests, + verifyHostBrowserCapability, +} from "../runtime/capability-tokens.js"; +import { handleBrowserExtensionPair } from "../runtime/routes/browser-extension-pair-routes.js"; + +// --------------------------------------------------------------------------- +// Native helper binary discovery + skip guard +// --------------------------------------------------------------------------- + +/** + * Resolve the path to the compiled native helper. The helper lives in a + * sibling package under `clients/chrome-extension-native-host/`, so we + * walk up from `assistant/src/__tests__/` to the repo root and then back + * down into the native-host package. + */ +function resolveHelperBinary(): string { + // `import.meta.dir` gives us `.../assistant/src/__tests__`. The repo + // root is three levels up. Past that, the native host lives at + // `clients/chrome-extension-native-host/dist/index.js`. + return resolve( + import.meta.dir, + "..", + "..", + "..", + "clients", + "chrome-extension-native-host", + "dist", + "index.js", + ); +} + +const HELPER_BINARY = resolveHelperBinary(); +const HELPER_EXISTS = existsSync(HELPER_BINARY); + +const ALLOWED_ORIGIN = "chrome-extension://aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/"; + +const SKIP_REASON = + "clients/chrome-extension-native-host/dist/index.js is missing — run `bun run build` in that package to enable the E2E smoke test."; + +// --------------------------------------------------------------------------- +// Chrome native messaging framing (4-byte LE length prefix + UTF-8 JSON) +// --------------------------------------------------------------------------- + +/** + * These helpers are duplicated from the native-host package's + * `protocol.ts` so this test is self-contained and does not reach across + * package boundaries at import time. The framing is fixed by the Chrome + * native messaging protocol spec, so there is no risk of drift. + */ +function encodeFrame(payload: unknown): Buffer { + const json = Buffer.from(JSON.stringify(payload), "utf8"); + const len = Buffer.alloc(4); + len.writeUInt32LE(json.length, 0); + return Buffer.concat([len, json]); +} + +function decodeFrames(buf: Buffer): { + frames: unknown[]; + remainder: Buffer; +} { + const frames: unknown[] = []; + let offset = 0; + while (buf.length - offset >= 4) { + const len = buf.readUInt32LE(offset); + if (buf.length - offset - 4 < len) break; + const body = buf.subarray(offset + 4, offset + 4 + len); + frames.push(JSON.parse(body.toString("utf8"))); + offset += 4 + len; + } + return { frames, remainder: buf.subarray(offset) }; +} + +// --------------------------------------------------------------------------- +// Minimal pair-endpoint HTTP server using the real route handler +// --------------------------------------------------------------------------- + +interface PairServer { + server: ReturnType; + port: number; + stop: () => void; +} + +/** + * Boots a minimal Bun.serve that mounts the real + * `handleBrowserExtensionPair` route. This is intentionally a narrower + * surface than `RuntimeHttpServer` — we want to exercise the exact same + * route handler the daemon uses in production, but without pulling in the + * full runtime's dependency graph (which would drag in the workspace DB, + * conversation manager, etc. and make the test flaky + slow). + */ +function startPairServer(): PairServer { + const server = Bun.serve({ + port: 0, + hostname: "127.0.0.1", + async fetch(req, srv) { + const url = new URL(req.url); + if (url.pathname === "/v1/browser-extension-pair") { + return handleBrowserExtensionPair(req, { + requestIP: (_req) => srv.requestIP(_req), + }); + } + return new Response("not found", { status: 404 }); + }, + }); + return { + server, + port: server.port as number, + stop: () => server.stop(true), + }; +} + +// --------------------------------------------------------------------------- +// Subprocess helper +// --------------------------------------------------------------------------- + +interface HelperRunResult { + frames: unknown[]; + stderr: string; + exitCode: number | null; +} + +function runHelper(options: { + extensionOrigin: string; + assistantPort: number; + stdinBytes: Buffer; + timeoutMs?: number; +}): Promise { + const args: string[] = [ + HELPER_BINARY, + options.extensionOrigin, + "--assistant-port", + String(options.assistantPort), + ]; + + const child: ChildProcessWithoutNullStreams = spawn("node", args, { + stdio: ["pipe", "pipe", "pipe"], + env: { ...process.env }, + }); + + const stdoutChunks: Buffer[] = []; + const stderrChunks: Buffer[] = []; + child.stdout.on("data", (chunk: Buffer) => stdoutChunks.push(chunk)); + child.stderr.on("data", (chunk: Buffer) => stderrChunks.push(chunk)); + + child.stdin.write(options.stdinBytes); + child.stdin.end(); + + return new Promise((resolvePromise, rejectPromise) => { + const timeout = setTimeout(() => { + child.kill("SIGKILL"); + rejectPromise( + new Error( + `helper binary timed out after ${options.timeoutMs ?? 5000}ms`, + ), + ); + }, options.timeoutMs ?? 5000); + + child.on("error", (err) => { + clearTimeout(timeout); + rejectPromise(err); + }); + child.on("close", (code) => { + clearTimeout(timeout); + const stdout = Buffer.concat(stdoutChunks); + const stderr = Buffer.concat(stderrChunks).toString("utf8"); + try { + const { frames } = decodeFrames(stdout); + resolvePromise({ frames, stderr, exitCode: code }); + } catch (err) { + rejectPromise(err as Error); + } + }); + }); +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe("host-browser E2E — self-hosted native messaging path", () => { + let pairServer: PairServer | null = null; + + beforeAll(() => { + // Pin the capability-token secret to a deterministic test value so + // the token the route mints can round-trip through + // `verifyHostBrowserCapability` in this process. Both sides of the + // flow share the same in-process module, so setting the secret once + // is enough for both mint + verify to agree. + resetCapabilityTokenSecretForTests(); + setCapabilityTokenSecretForTests(randomBytes(32)); + + if (!HELPER_EXISTS) return; + pairServer = startPairServer(); + }); + + afterAll(() => { + if (pairServer) pairServer.stop(); + resetCapabilityTokenSecretForTests(); + }); + + if (!HELPER_EXISTS) { + // Native helper hasn't been built; emit a warning so the gap is + // visible in test output without registering a placeholder test. + console.warn(`[host-browser-e2e] ${SKIP_REASON}`); + } else { + test("pair flow: request_token -> token_response -> verifiable capability token", async () => { + // Narrow for TS — pairServer is always set in this branch thanks to + // the beforeAll guard that mirrors HELPER_EXISTS. + const srv = pairServer!; + + const result = await runHelper({ + extensionOrigin: ALLOWED_ORIGIN, + assistantPort: srv.port, + stdinBytes: encodeFrame({ type: "request_token" }), + }); + + // Helper should have exited cleanly after writing one token_response + // frame. Pipe any stderr into the assertion message to make + // debugging failures easier. + expect(result.exitCode, `helper stderr: ${result.stderr}`).toBe(0); + expect(result.frames).toHaveLength(1); + + const frame = result.frames[0] as { + type: string; + token?: string; + expiresAt?: string; + guardianId?: string; + }; + expect(frame.type).toBe("token_response"); + expect(typeof frame.token).toBe("string"); + expect(frame.token!.length).toBeGreaterThan(0); + expect(typeof frame.expiresAt).toBe("string"); + + // Gap 3 regression guard: the helper must surface the + // guardianId returned by /v1/browser-extension-pair on the + // native-messaging frame so the chrome extension's + // bootstrapLocalToken() can persist it. The route's + // resolveLocalGuardianId() falls back to the literal string + // "local" when no vellum guardian is bootstrapped, which is + // the case in this test environment, so we assert against the + // exact value as well as a non-empty type guard. + expect(typeof frame.guardianId).toBe("string"); + expect(frame.guardianId!.length).toBeGreaterThan(0); + expect(frame.guardianId).toBe("local"); + + // The returned token must verify via the in-process capability + // verifier — this is the core invariant the native-messaging + // bootstrap promises. The daemon is the only party that could + // have signed this, so a successful verification proves the + // end-to-end pair flow worked. + const claims = verifyHostBrowserCapability(frame.token!); + expect(claims).not.toBeNull(); + expect(claims?.capability).toBe("host_browser_command"); + expect(typeof claims?.guardianId).toBe("string"); + expect(claims?.guardianId.length).toBeGreaterThan(0); + // The frame's guardianId should match the claim's guardianId — + // both originate from the same `resolveLocalGuardianId()` call + // inside the route handler. + expect(frame.guardianId).toBe(claims?.guardianId); + // expiresAt in the response frame should agree with the numeric + // claim expiry to within ISO-string precision. + const iso = new Date(claims!.expiresAt).toISOString(); + expect(frame.expiresAt).toBe(iso); + }); + + // Phase 3 will extend `/v1/browser-relay` to accept capability tokens + // minted by `mintHostBrowserCapability`. Once the upgrade handler honors + // those tokens, this test should round-trip Browser.getVersion through + // the relay and assert the result frame. + test.todo( + "Phase 3: WebSocket round-trip via /v1/browser-relay?token=", + () => {}, + ); + } + + // ------------------------------------------------------------------------- + // Dev-only `~/.vellum/daemon-token` fallback + // ------------------------------------------------------------------------- + + describe("dev daemon-token fallback path", () => { + let tmpDir: string; + + beforeAll(() => { + tmpDir = mkdtempSync(join(tmpdir(), "vellum-daemon-token-test-")); + }); + + afterAll(() => { + rmSync(tmpDir, { recursive: true, force: true }); + }); + + test("a token written to a local file round-trips through verifyHostBrowserCapability", () => { + // Emulate the `writeDaemonTokenFallback` lifecycle: mint a fresh + // capability token, persist it to a 0600 file (the production + // helper writes to `~/.vellum/daemon-token`, but we use a tempdir + // so the test doesn't clobber real dev state), then read it back + // and verify. + // + // This path is what the Mac app's manual "paste daemon token" + // pairing UI ends up exercising — the file on disk is the only + // transport. If the bytes on disk don't round-trip through + // `verifyHostBrowserCapability`, manual pairing is broken. + resetCapabilityTokenSecretForTests(); + setCapabilityTokenSecretForTests(randomBytes(32)); + + const { token, expiresAt } = mintHostBrowserCapability("local"); + expect(expiresAt).toBeGreaterThan(Date.now()); + + const tokenPath = join(tmpDir, "daemon-token"); + writeFileSync(tokenPath, token, { mode: 0o600 }); + // Explicitly chmod in case the umask clobbered the mode arg to + // writeFileSync (best-effort — some filesystems ignore this). + try { + chmodSync(tokenPath, 0o600); + } catch { + /* ignore */ + } + + const readBack = readFileSync(tokenPath, "utf8"); + expect(readBack).toBe(token); + + const claims = verifyHostBrowserCapability(readBack); + expect(claims).not.toBeNull(); + expect(claims?.capability).toBe("host_browser_command"); + expect(claims?.guardianId).toBe("local"); + }); + }); +}); diff --git a/assistant/src/__tests__/host-cu-proxy.test.ts b/assistant/src/__tests__/host-cu-proxy.test.ts index 777ada9e4d0..7b0cf0d8b64 100644 --- a/assistant/src/__tests__/host-cu-proxy.test.ts +++ b/assistant/src/__tests__/host-cu-proxy.test.ts @@ -1,4 +1,4 @@ -import { afterEach, describe, expect, test } from "bun:test"; +import { afterEach, describe, expect, jest, test } from "bun:test"; import { HostCuProxy } from "../daemon/host-cu-proxy.js"; @@ -776,6 +776,176 @@ describe("HostCuProxy", () => { }); }); + // ------------------------------------------------------------------------- + // abort listener lifecycle + // ------------------------------------------------------------------------- + + describe("abort listener lifecycle", () => { + // Helper that wraps an AbortSignal to observe add/removeEventListener + // invocations without tripping over tsc's strict overload matching on + // AbortSignal itself. + type Spied = { + signal: AbortSignal; + addCalls: string[]; + removeCalls: string[]; + }; + function spySignal(source: AbortSignal): Spied { + const addCalls: string[] = []; + const removeCalls: string[] = []; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const s = source as any; + const origAdd = source.addEventListener.bind(source); + const origRemove = source.removeEventListener.bind(source); + s.addEventListener = ( + type: string, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + ...rest: any[] + ) => { + addCalls.push(type); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + return (origAdd as any)(type, ...rest); + }; + s.removeEventListener = ( + type: string, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + ...rest: any[] + ) => { + removeCalls.push(type); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + return (origRemove as any)(type, ...rest); + }; + return { signal: source, addCalls, removeCalls }; + } + + test("removes abort listener from signal after resolve completes", async () => { + setup(); + const controller = new AbortController(); + const spy = spySignal(controller.signal); + + const resultPromise = proxy.request( + "computer_use_click", + { element_id: 1 }, + "session-1", + 1, + undefined, + spy.signal, + ); + + expect(spy.addCalls).toEqual(["abort"]); + expect(spy.removeCalls).toEqual([]); + + const requestId = (sentMessages[0] as Record) + .requestId as string; + proxy.resolve(requestId, { axTree: "Button [1]" }); + await resultPromise; + + // Listener is detached after normal completion. + expect(spy.removeCalls).toEqual(["abort"]); + + // Subsequent aborts are harmless no-ops (no side effects on the proxy). + controller.abort(); + // No additional emitted envelopes from the late abort. + expect(sentMessages).toHaveLength(1); + }); + + test("removes abort listener from signal on timer timeout", async () => { + setup(); + + jest.useFakeTimers(); + try { + const controller = new AbortController(); + const spy = spySignal(controller.signal); + + const resultPromise = proxy.request( + "computer_use_click", + { element_id: 1 }, + "session-1", + 1, + undefined, + spy.signal, + ); + + expect(spy.addCalls).toEqual(["abort"]); + expect(spy.removeCalls).toEqual([]); + + const requestId = (sentMessages[0] as Record) + .requestId as string; + expect(proxy.hasPendingRequest(requestId)).toBe(true); + + // Advance past the 60s internal timeout. + jest.advanceTimersByTime(61 * 1000); + + const result = await resultPromise; + expect(result.isError).toBe(true); + expect(result.content).toContain("Host CU proxy timed out"); + expect(proxy.hasPendingRequest(requestId)).toBe(false); + + // Listener is detached after the timer fires. + expect(spy.removeCalls).toEqual(["abort"]); + + // Subsequent aborts should be harmless — no cancel emitted. + controller.abort(); + expect(sentMessages).toHaveLength(1); + } finally { + jest.useRealTimers(); + } + }); + }); + + // ------------------------------------------------------------------------- + // sender throws synchronously + // ------------------------------------------------------------------------- + + describe("sender throws synchronously", () => { + test("rejects the promise, clears pending state and timer, invokes onInternalResolve", async () => { + sentMessages = []; + resolvedRequestIds = []; + const throwingSend = () => { + throw new Error("transport down"); + }; + proxy = new HostCuProxy(throwingSend as never, (requestId: string) => + resolvedRequestIds.push(requestId), + ); + + // request() synchronously calls sendToClient inside the Promise + // executor. A throw there surfaces as a rejected promise. + const resultPromise = proxy.request( + "computer_use_click", + { element_id: 1 }, + "session-1", + 1, + ); + + await expect(resultPromise).rejects.toThrow("transport down"); + + // The internal resolve should fire exactly once as part of cleanup. + expect(resolvedRequestIds).toHaveLength(1); + + // Issue a new request on a fresh (non-throwing) sender and verify + // the proxy is still functional — no stale timers or bookkeeping + // from the failed request. + sentMessages = []; + proxy.updateSender( + ((msg: unknown) => sentMessages.push(msg)) as never, + true, + ); + const okPromise = proxy.request( + "computer_use_click", + { element_id: 2 }, + "session-1", + 2, + ); + expect(sentMessages).toHaveLength(1); + const okRequestId = (sentMessages[0] as Record) + .requestId as string; + expect(proxy.hasPendingRequest(okRequestId)).toBe(true); + proxy.resolve(okRequestId, { axTree: "Button [2]" }); + const okResult = await okPromise; + expect(okResult.isError).toBe(false); + expect(okResult.content).toContain("Button [2]"); + }); + }); + // ------------------------------------------------------------------------- // onInternalResolve callback // ------------------------------------------------------------------------- diff --git a/assistant/src/__tests__/host-file-proxy.test.ts b/assistant/src/__tests__/host-file-proxy.test.ts index b08626e9748..4fe9bc3d4fd 100644 --- a/assistant/src/__tests__/host-file-proxy.test.ts +++ b/assistant/src/__tests__/host-file-proxy.test.ts @@ -1,4 +1,4 @@ -import { afterEach, describe, expect, test } from "bun:test"; +import { afterEach, describe, expect, jest, test } from "bun:test"; const { HostFileProxy } = await import("../daemon/host-file-proxy.js"); @@ -377,6 +377,151 @@ describe("HostFileProxy", () => { }); }); + describe("abort listener lifecycle", () => { + // Helper that wraps an AbortSignal to observe add/removeEventListener + // invocations without tripping over tsc's strict overload matching on + // AbortSignal itself. + type Spied = { + signal: AbortSignal; + addCalls: string[]; + removeCalls: string[]; + }; + function spySignal(source: AbortSignal): Spied { + const addCalls: string[] = []; + const removeCalls: string[] = []; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const s = source as any; + const origAdd = source.addEventListener.bind(source); + const origRemove = source.removeEventListener.bind(source); + s.addEventListener = ( + type: string, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + ...rest: any[] + ) => { + addCalls.push(type); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + return (origAdd as any)(type, ...rest); + }; + s.removeEventListener = ( + type: string, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + ...rest: any[] + ) => { + removeCalls.push(type); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + return (origRemove as any)(type, ...rest); + }; + return { signal: source, addCalls, removeCalls }; + } + + test("removes abort listener from signal after resolve completes", async () => { + setup(); + const controller = new AbortController(); + const spy = spySignal(controller.signal); + + const resultPromise = proxy.request( + { operation: "read", path: "/tmp/test.txt" }, + "session-1", + spy.signal, + ); + + expect(spy.addCalls).toEqual(["abort"]); + expect(spy.removeCalls).toEqual([]); + + const requestId = (sentMessages[0] as Record) + .requestId as string; + proxy.resolve(requestId, { content: "file contents", isError: false }); + await resultPromise; + + // Listener is detached after normal completion. + expect(spy.removeCalls).toEqual(["abort"]); + + // Subsequent aborts are harmless no-ops (no side effects on the proxy). + controller.abort(); + // No additional emitted envelopes from the late abort. + expect(sentMessages).toHaveLength(1); + }); + + test("removes abort listener from signal on timer timeout", async () => { + setup(); + + jest.useFakeTimers(); + try { + const controller = new AbortController(); + const spy = spySignal(controller.signal); + + const resultPromise = proxy.request( + { operation: "read", path: "/tmp/slow.txt" }, + "session-1", + spy.signal, + ); + + expect(spy.addCalls).toEqual(["abort"]); + expect(spy.removeCalls).toEqual([]); + + const requestId = (sentMessages[0] as Record) + .requestId as string; + expect(proxy.hasPendingRequest(requestId)).toBe(true); + + // Advance past the 30s internal timeout. + jest.advanceTimersByTime(31 * 1000); + + const result = await resultPromise; + expect(result.isError).toBe(true); + expect(result.content).toContain("Host file proxy timed out"); + expect(proxy.hasPendingRequest(requestId)).toBe(false); + + // Listener is detached after the timer fires. + expect(spy.removeCalls).toEqual(["abort"]); + + // Subsequent aborts should be harmless — no cancel emitted. + controller.abort(); + expect(sentMessages).toHaveLength(1); + } finally { + jest.useRealTimers(); + } + }); + }); + + describe("sender throws synchronously", () => { + test("rejects the promise, clears pending state and timer, invokes onInternalResolve", async () => { + const resolvedIds: string[] = []; + sentMessages = []; + sendToClient = () => { + throw new Error("transport down"); + }; + proxy = new HostFileProxy(sendToClient, (id) => resolvedIds.push(id)); + + const resultPromise = proxy.request( + { operation: "read", path: "/tmp/test.txt" }, + "session-1", + ); + + await expect(resultPromise).rejects.toThrow("transport down"); + + // The internal resolve should fire exactly once as part of cleanup. + expect(resolvedIds).toHaveLength(1); + + // Issue a new request on a fresh (non-throwing) sender and verify + // the proxy is still functional — no stale timers or bookkeeping + // from the failed request. + sentMessages = []; + proxy.updateSender((msg) => sentMessages.push(msg), true); + const okPromise = proxy.request( + { operation: "read", path: "/tmp/ok.txt" }, + "session-1", + ); + expect(sentMessages).toHaveLength(1); + const okRequestId = (sentMessages[0] as Record) + .requestId as string; + expect(proxy.hasPendingRequest(okRequestId)).toBe(true); + proxy.resolve(okRequestId, { content: "ok", isError: false }); + const okResult = await okPromise; + expect(okResult.content).toBe("ok"); + expect(okResult.isError).toBe(false); + }); + }); + describe("onInternalResolve callback", () => { test("fires on abort", async () => { const resolvedIds: string[] = []; diff --git a/assistant/src/browser-extension-relay/server.ts b/assistant/src/browser-extension-relay/server.ts index b6d35c2aa11..5ccec8e0d83 100644 --- a/assistant/src/browser-extension-relay/server.ts +++ b/assistant/src/browser-extension-relay/server.ts @@ -28,6 +28,14 @@ interface PendingCommand { export interface BrowserRelayWebSocketData { wsType: "browser-relay"; connectionId: string; + /** + * Guardian identity derived from the JWT claims at WebSocket upgrade + * time. Used by the ChromeExtensionRegistry (runtime/) to route + * host_browser_request frames to the correct extension. Undefined when + * HTTP auth is disabled (dev bypass) or when the token's sub cannot be + * parsed into an actor principal. + */ + guardianId?: string; } export interface ExtensionRelayStatus { diff --git a/assistant/src/browser-session/__tests__/manager.test.ts b/assistant/src/browser-session/__tests__/manager.test.ts new file mode 100644 index 00000000000..a297a980285 --- /dev/null +++ b/assistant/src/browser-session/__tests__/manager.test.ts @@ -0,0 +1,170 @@ +import { describe, expect, test } from "bun:test"; + +import { + type BrowserBackend, + BrowserSessionManager, + type CdpCommand, + type CdpResult, + createExtensionBackend, +} from "../index.js"; + +interface MockBackendState { + available: boolean; + disposed: boolean; + lastCommand?: CdpCommand; + lastSignal?: AbortSignal; + sendImpl?: (command: CdpCommand, signal?: AbortSignal) => Promise; +} + +function createMockExtensionBackend(state: MockBackendState): BrowserBackend { + return createExtensionBackend({ + isAvailable: () => state.available, + sendCdp: async (command, signal) => { + state.lastCommand = command; + state.lastSignal = signal; + if (state.sendImpl) return state.sendImpl(command, signal); + return { result: { ok: true } }; + }, + dispose: () => { + state.disposed = true; + }, + }); +} + +describe("BrowserSessionManager", () => { + test("selectBackend throws when no backend is available", () => { + const state: MockBackendState = { available: false, disposed: false }; + const manager = new BrowserSessionManager({ + backends: [createMockExtensionBackend(state)], + }); + expect(() => manager.selectBackend()).toThrow( + "No available browser backend", + ); + }); + + test("selectBackend returns the extension backend when available", () => { + const state: MockBackendState = { available: true, disposed: false }; + const backend = createMockExtensionBackend(state); + const manager = new BrowserSessionManager({ backends: [backend] }); + const selected = manager.selectBackend(); + expect(selected.kind).toBe("extension"); + expect(selected).toBe(backend); + }); + + test("createSession returns a session with a new uuid stored in the map", () => { + const state: MockBackendState = { available: true, disposed: false }; + const manager = new BrowserSessionManager({ + backends: [createMockExtensionBackend(state)], + }); + const session = manager.createSession(); + expect(session.id).toBeTruthy(); + expect(session.backendKind).toBe("extension"); + // Lookup round-trips. + expect(manager.getSession(session.id)).toEqual(session); + // Two sessions get unique ids. + const another = manager.createSession(); + expect(another.id).not.toBe(session.id); + }); + + test("send delegates to backend.send and returns the CDP result", async () => { + const expectedResult: CdpResult = { result: { value: 42 } }; + const state: MockBackendState = { + available: true, + disposed: false, + sendImpl: async () => expectedResult, + }; + const manager = new BrowserSessionManager({ + backends: [createMockExtensionBackend(state)], + }); + const result = await manager.send(undefined, { + method: "Browser.getVersion", + params: { foo: "bar" }, + }); + expect(result).toEqual(expectedResult); + expect(state.lastCommand).toEqual({ + method: "Browser.getVersion", + params: { foo: "bar" }, + }); + }); + + test("send with an aborted signal propagates the abort", async () => { + const state: MockBackendState = { + available: true, + disposed: false, + sendImpl: async (_command, signal) => { + if (signal?.aborted) { + throw new Error("aborted"); + } + return { result: { ok: true } }; + }, + }; + const manager = new BrowserSessionManager({ + backends: [createMockExtensionBackend(state)], + }); + const controller = new AbortController(); + controller.abort(); + await expect( + manager.send( + undefined, + { method: "Browser.getVersion" }, + controller.signal, + ), + ).rejects.toThrow("aborted"); + expect(state.lastSignal).toBe(controller.signal); + }); + + test("disposeAll calls backend.dispose and clears the session map", () => { + const state: MockBackendState = { available: true, disposed: false }; + const manager = new BrowserSessionManager({ + backends: [createMockExtensionBackend(state)], + }); + const session = manager.createSession(); + expect(manager.getSession(session.id)).toBeDefined(); + manager.disposeAll(); + expect(state.disposed).toBe(true); + expect(manager.getSession(session.id)).toBeUndefined(); + }); + + test("send with a known sessionId routes through the matching backend", async () => { + const expectedResult: CdpResult = { result: { routed: true } }; + const state: MockBackendState = { + available: true, + disposed: false, + sendImpl: async () => expectedResult, + }; + const manager = new BrowserSessionManager({ + backends: [createMockExtensionBackend(state)], + }); + const session = manager.createSession(); + const result = await manager.send(session.id, { + method: "Browser.getVersion", + }); + expect(result).toEqual(expectedResult); + expect(state.lastCommand).toEqual({ method: "Browser.getVersion" }); + }); + + test("send with an unknown sessionId throws", async () => { + const state: MockBackendState = { available: true, disposed: false }; + const manager = new BrowserSessionManager({ + backends: [createMockExtensionBackend(state)], + }); + await expect( + manager.send("does-not-exist", { method: "Browser.getVersion" }), + ).rejects.toThrow("Unknown browser session: does-not-exist"); + // The mock backend should not have received the command. + expect(state.lastCommand).toBeUndefined(); + }); + + test("send with a sessionId of a disposed session throws", async () => { + const state: MockBackendState = { available: true, disposed: false }; + const manager = new BrowserSessionManager({ + backends: [createMockExtensionBackend(state)], + }); + const session = manager.createSession(); + manager.disposeSession(session.id); + await expect( + manager.send(session.id, { method: "Browser.getVersion" }), + ).rejects.toThrow(`Unknown browser session: ${session.id}`); + expect(state.lastCommand).toBeUndefined(); + }); +}); diff --git a/assistant/src/browser-session/backends/extension.ts b/assistant/src/browser-session/backends/extension.ts new file mode 100644 index 00000000000..b38250ca8b1 --- /dev/null +++ b/assistant/src/browser-session/backends/extension.ts @@ -0,0 +1,25 @@ +import type { BrowserBackend, CdpCommand, CdpResult } from "../types.js"; + +/** + * Extension backend stub. Phase 2 Wave 2 will wire this to the runtime's + * chrome-extension WebSocket connection registry. For now this is a pure + * interface implementation so BrowserSessionManager and its tests can be + * written without depending on runtime internals. + */ +export interface ExtensionBackendDeps { + /** Sends a CDP command to an attached chrome extension and returns the CDP result. */ + sendCdp(command: CdpCommand, signal?: AbortSignal): Promise; + isAvailable(): boolean; + dispose(): void; +} + +export function createExtensionBackend( + deps: ExtensionBackendDeps, +): BrowserBackend { + return { + kind: "extension", + isAvailable: deps.isAvailable, + send: deps.sendCdp, + dispose: deps.dispose, + }; +} diff --git a/assistant/src/browser-session/index.ts b/assistant/src/browser-session/index.ts new file mode 100644 index 00000000000..5429e364594 --- /dev/null +++ b/assistant/src/browser-session/index.ts @@ -0,0 +1,3 @@ +export * from "./backends/extension.js"; +export * from "./manager.js"; +export * from "./types.js"; diff --git a/assistant/src/browser-session/manager.ts b/assistant/src/browser-session/manager.ts new file mode 100644 index 00000000000..5ba3734557d --- /dev/null +++ b/assistant/src/browser-session/manager.ts @@ -0,0 +1,86 @@ +import { v4 as uuid } from "uuid"; + +import type { + BrowserBackend, + BrowserSession, + CdpCommand, + CdpResult, +} from "./types.js"; + +export interface BrowserSessionManagerOptions { + /** Ordered list of backends to try; first available wins. Phase 2 only has extension. */ + backends: BrowserBackend[]; +} + +export class BrowserSessionManager { + private backends: BrowserBackend[]; + private sessions = new Map(); + + constructor(opts: BrowserSessionManagerOptions) { + this.backends = opts.backends; + } + + /** Pick an available backend or throw. */ + selectBackend(): BrowserBackend { + const b = this.backends.find((x) => x.isAvailable()); + if (!b) throw new Error("No available browser backend"); + return b; + } + + createSession(): BrowserSession { + const backend = this.selectBackend(); + const session: BrowserSession = { id: uuid(), backendKind: backend.kind }; + this.sessions.set(session.id, session); + return session; + } + + getSession(id: string): BrowserSession | undefined { + return this.sessions.get(id); + } + + /** + * Dispatch a CDP command. + * + * - If `sessionId` is provided, the session must exist in the manager; otherwise this throws. + * The command is routed through the backend whose `kind` matches the session's `backendKind`, + * ensuring per-session backend isolation and making `disposeSession()` an actual enforcement + * boundary against stale ids. + * - If `sessionId` is `undefined`, the first available backend is selected (legacy advisory + * behavior used for one-off commands without a session handle). + * + * Phase 2 only has the extension backend so routing is effectively a no-op, but Phase 4 will + * rely on this contract once multi-backend / multi-tab multiplexing lands. + */ + async send( + sessionId: string | undefined, + command: CdpCommand, + signal?: AbortSignal, + ): Promise { + let backend: BrowserBackend; + if (sessionId !== undefined) { + const session = this.sessions.get(sessionId); + if (!session) { + throw new Error(`Unknown browser session: ${sessionId}`); + } + const matched = this.backends.find((b) => b.kind === session.backendKind); + if (!matched) { + throw new Error( + `No backend available for session kind: ${session.backendKind}`, + ); + } + backend = matched; + } else { + backend = this.selectBackend(); + } + return backend.send(command, signal); + } + + disposeSession(id: string): void { + this.sessions.delete(id); + } + + disposeAll(): void { + for (const b of this.backends) b.dispose(); + this.sessions.clear(); + } +} diff --git a/assistant/src/browser-session/types.ts b/assistant/src/browser-session/types.ts new file mode 100644 index 00000000000..e036e42c780 --- /dev/null +++ b/assistant/src/browser-session/types.ts @@ -0,0 +1,28 @@ +export type BrowserBackendKind = "extension"; // Phase 4/5 will add "cdp-inspect", "playwright". + +export interface CdpCommand { + method: string; + params?: Record; + sessionId?: string; +} + +export interface CdpResult { + /** Raw CDP result object; opaque to the manager. */ + result?: unknown; + /** CDP error envelope if the command failed. */ + error?: { code: number; message: string; data?: unknown }; +} + +export interface BrowserSession { + id: string; + backendKind: BrowserBackendKind; + /** Opaque target/sessionId from the backend. Omitted for "most-recent-tab" commands. */ + targetId?: string; +} + +export interface BrowserBackend { + kind: BrowserBackendKind; + isAvailable(): boolean; + send(command: CdpCommand, signal?: AbortSignal): Promise; + dispose(): void; +} diff --git a/assistant/src/channels/__tests__/types.test.ts b/assistant/src/channels/__tests__/types.test.ts new file mode 100644 index 00000000000..33bfeeb7c6a --- /dev/null +++ b/assistant/src/channels/__tests__/types.test.ts @@ -0,0 +1,134 @@ +import { describe, expect, test } from "bun:test"; + +import { + INTERACTIVE_INTERFACES, + INTERFACE_IDS, + isInterfaceId, + supportsHostProxy, +} from "../types.js"; + +describe("INTERFACE_IDS", () => { + test("includes chrome-extension", () => { + expect( + (INTERFACE_IDS as readonly string[]).includes("chrome-extension"), + ).toBe(true); + }); + + test("still includes macos and other existing interfaces", () => { + for (const id of [ + "macos", + "ios", + "cli", + "telegram", + "phone", + "vellum", + "whatsapp", + "slack", + "email", + ]) { + expect((INTERFACE_IDS as readonly string[]).includes(id)).toBe(true); + } + }); +}); + +describe("INTERACTIVE_INTERFACES", () => { + test("does NOT include chrome-extension", () => { + // Chrome extensions don't render SSE-backed prompter UI, so they must + // stay out of the interactive set even though they have an InterfaceId. + expect(INTERACTIVE_INTERFACES.has("chrome-extension" as never)).toBe(false); + }); + + test("still includes macos", () => { + expect(INTERACTIVE_INTERFACES.has("macos")).toBe(true); + }); +}); + +describe("isInterfaceId", () => { + test("returns true for chrome-extension", () => { + expect(isInterfaceId("chrome-extension")).toBe(true); + }); + + test("returns true for macos", () => { + expect(isInterfaceId("macos")).toBe(true); + }); + + test("returns false for unknown interface", () => { + expect(isInterfaceId("safari-extension")).toBe(false); + }); +}); + +describe("supportsHostProxy", () => { + // ── macOS: supports every capability, and the no-arg form returns true. ── + test("macos returns true (no capability)", () => { + expect(supportsHostProxy("macos")).toBe(true); + }); + + test("macos returns true for host_bash", () => { + expect(supportsHostProxy("macos", "host_bash")).toBe(true); + }); + + test("macos returns true for host_file", () => { + expect(supportsHostProxy("macos", "host_file")).toBe(true); + }); + + test("macos returns true for host_cu", () => { + expect(supportsHostProxy("macos", "host_cu")).toBe(true); + }); + + test("macos returns true for host_browser", () => { + expect(supportsHostProxy("macos", "host_browser")).toBe(true); + }); + + // ── chrome-extension: only host_browser. ── + test("chrome-extension returns false (no capability)", () => { + // Chrome extension does not support "any host proxy at all" — it only + // supports host_browser, so the no-arg form must return false to keep + // existing call sites that guard desktop-only behavior unchanged. + expect(supportsHostProxy("chrome-extension")).toBe(false); + }); + + test("chrome-extension returns true for host_browser", () => { + expect(supportsHostProxy("chrome-extension", "host_browser")).toBe(true); + }); + + test("chrome-extension returns false for host_bash", () => { + expect(supportsHostProxy("chrome-extension", "host_bash")).toBe(false); + }); + + test("chrome-extension returns false for host_file", () => { + expect(supportsHostProxy("chrome-extension", "host_file")).toBe(false); + }); + + test("chrome-extension returns false for host_cu", () => { + expect(supportsHostProxy("chrome-extension", "host_cu")).toBe(false); + }); + + // ── Non-supporting interfaces: false in all forms. ── + test("cli returns false (no capability)", () => { + expect(supportsHostProxy("cli")).toBe(false); + }); + + test("cli returns false for host_bash", () => { + expect(supportsHostProxy("cli", "host_bash")).toBe(false); + }); + + test("cli returns false for host_browser", () => { + expect(supportsHostProxy("cli", "host_browser")).toBe(false); + }); + + test("telegram returns false (no capability)", () => { + expect(supportsHostProxy("telegram")).toBe(false); + }); + + test("telegram returns false for host_browser", () => { + expect(supportsHostProxy("telegram", "host_browser")).toBe(false); + }); + + test("vellum returns false (no capability)", () => { + expect(supportsHostProxy("vellum")).toBe(false); + }); + + test("email returns false for host_browser", () => { + expect(supportsHostProxy("email", "host_browser")).toBe(false); + }); +}); diff --git a/assistant/src/channels/types.ts b/assistant/src/channels/types.ts index ec9e8bf6d2e..fd3665e7018 100644 --- a/assistant/src/channels/types.ts +++ b/assistant/src/channels/types.ts @@ -48,6 +48,7 @@ export const INTERFACE_IDS = [ "whatsapp", "slack", "email", + "chrome-extension", ] as const; export type InterfaceId = (typeof INTERFACE_IDS)[number]; @@ -90,9 +91,37 @@ export function isInteractiveInterface(id: InterfaceId): boolean { return INTERACTIVE_INTERFACES.has(id); } -/** Whether the interface supports host proxies (bash, file, computer-use). */ -export function supportsHostProxy(id: InterfaceId): boolean { - return id === "macos"; +/** + * Host proxy capabilities that an interface can support. The macOS client + * supports all four; the chrome-extension interface only supports + * host_browser (via the Chrome DevTools Protocol proxy). + */ +export type HostProxyCapability = + | "host_bash" + | "host_file" + | "host_cu" + | "host_browser"; + +/** + * Whether the interface supports a host proxy capability. + * + * The no-arg form `supportsHostProxy(id)` asks "does this interface support + * the full desktop host proxy set?" — it returns `true` only for macOS, which + * supports all four capabilities. It returns `false` for + * chrome-extension because chrome-extension only supports `host_browser`, + * and the no-arg form is the gate that legacy desktop-only call sites use + * (e.g. preactivating computer-use, restoring all four proxies in the drain + * queue). Callers that want to check a single capability — for example, to + * decide whether to keep `hostBrowserProxy` available for chrome-extension — + * should pass the capability explicitly: `supportsHostProxy(id, "host_browser")`. + */ +export function supportsHostProxy( + id: InterfaceId, + capability?: HostProxyCapability, +): boolean { + if (id === "macos") return true; + if (id === "chrome-extension" && capability === "host_browser") return true; + return false; } export interface TurnInterfaceContext { diff --git a/assistant/src/daemon/__tests__/conversation-tool-setup.test.ts b/assistant/src/daemon/__tests__/conversation-tool-setup.test.ts new file mode 100644 index 00000000000..18d3cfcbd31 --- /dev/null +++ b/assistant/src/daemon/__tests__/conversation-tool-setup.test.ts @@ -0,0 +1,167 @@ +/** + * Tests for `isToolActiveForContext` host-tool capability gating. + * + * Two scenarios are verified: + * - chrome-extension is its own executor and is exempt from the hasNoClient + * gate (the extension's own popup UI gates commands; there is no SSE + * interactive approval channel, and chrome-extension turns intentionally + * run with `hasNoClient: true` because chrome-extension is not in + * `INTERACTIVE_INTERFACES`). + * - macos still requires a connected SSE client for interactive approval, so + * `hasNoClient: true` continues to deny all host tools on macos. + * + * The per-capability check (`supportsHostProxy(transport, capability)`) runs + * first and is authoritative for structural support, so host_bash and + * host_file_* are filtered out for chrome-extension regardless of the + * hasNoClient flag. + */ + +import { describe, expect, test } from "bun:test"; + +import type { SkillProjectionCache } from "../conversation-skill-tools.js"; +import { + isToolActiveForContext, + type SkillProjectionContext, +} from "../conversation-tool-setup.js"; + +function makeCtx( + overrides: Partial = {}, +): SkillProjectionContext { + return { + skillProjectionState: new Map(), + skillProjectionCache: {} as SkillProjectionCache, + coreToolNames: new Set(), + toolsDisabledDepth: 0, + ...overrides, + }; +} + +describe("isToolActiveForContext — host tool capability gating", () => { + // macOS transport: SSE-based interactive approval required. + test("host_bash is active for macOS with a connected client", () => { + expect( + isToolActiveForContext( + "host_bash", + makeCtx({ hasNoClient: false, transportInterface: "macos" }), + ), + ).toBe(true); + }); + + test("host_bash is NOT active for macOS when hasNoClient is true (security invariant)", () => { + // macOS uses an SSE-based interactive approval channel. Without a + // connected client the guardian auto-approve path could execute host + // commands unattended, so host tools must be denied. + expect( + isToolActiveForContext( + "host_bash", + makeCtx({ hasNoClient: true, transportInterface: "macos" }), + ), + ).toBe(false); + }); + + test("host_file_read is NOT active for macOS when hasNoClient is true", () => { + expect( + isToolActiveForContext( + "host_file_read", + makeCtx({ hasNoClient: true, transportInterface: "macos" }), + ), + ).toBe(false); + }); + + test("host_browser is active for macOS with a connected client", () => { + expect( + isToolActiveForContext( + "host_browser", + makeCtx({ hasNoClient: false, transportInterface: "macos" }), + ), + ).toBe(true); + }); + + test("host_browser is NOT active for macOS when hasNoClient is true", () => { + // macOS requires a client for any host tool — the SSE interactive + // approval channel must be available regardless of capability. + expect( + isToolActiveForContext( + "host_browser", + makeCtx({ hasNoClient: true, transportInterface: "macos" }), + ), + ).toBe(false); + }); + + // chrome-extension transport: the extension is its own executor. + test("host_browser is active for chrome-extension even when hasNoClient is true", () => { + // chrome-extension turns run with `hasNoClient: true` by design because + // chrome-extension is not in `INTERACTIVE_INTERFACES` — it is not an + // SSE interactive channel. The extension gates host_browser commands + // via its own popup UI, so the hasNoClient gate must not filter + // host_browser out for chrome-extension transports. + expect( + isToolActiveForContext( + "host_browser", + makeCtx({ + hasNoClient: true, + transportInterface: "chrome-extension", + }), + ), + ).toBe(true); + }); + + test("host_browser is active for chrome-extension when hasNoClient is false", () => { + expect( + isToolActiveForContext( + "host_browser", + makeCtx({ + hasNoClient: false, + transportInterface: "chrome-extension", + }), + ), + ).toBe(true); + }); + + test("host_bash is NOT active for chrome-extension even when hasNoClient is true", () => { + // The per-capability check runs first and is authoritative: chrome-extension + // only supports `host_browser`, so `host_bash` must be filtered out. + expect( + isToolActiveForContext( + "host_bash", + makeCtx({ + hasNoClient: true, + transportInterface: "chrome-extension", + }), + ), + ).toBe(false); + }); + + test("host_file_read is NOT active for chrome-extension when hasNoClient is true", () => { + expect( + isToolActiveForContext( + "host_file_read", + makeCtx({ + hasNoClient: true, + transportInterface: "chrome-extension", + }), + ), + ).toBe(false); + }); + + // Backwards-compat fallback: no transport plumbed through. + test("host_bash falls back to hasNoClient gate when transport is undefined (client connected)", () => { + // Without a transport interface we cannot run the per-capability check, + // so we fall back to the coarse-grained `hasNoClient` behavior. + expect( + isToolActiveForContext( + "host_bash", + makeCtx({ hasNoClient: false, transportInterface: undefined }), + ), + ).toBe(true); + }); + + test("host_bash falls back to hasNoClient gate when transport is undefined (no client)", () => { + expect( + isToolActiveForContext( + "host_bash", + makeCtx({ hasNoClient: true, transportInterface: undefined }), + ), + ).toBe(false); + }); +}); diff --git a/assistant/src/daemon/conversation-process.ts b/assistant/src/daemon/conversation-process.ts index 229c1d880f8..77fddbdd9db 100644 --- a/assistant/src/daemon/conversation-process.ts +++ b/assistant/src/daemon/conversation-process.ts @@ -136,6 +136,8 @@ export interface ProcessConversationContext { clearProxyAvailability(): void; /** Restore host proxy availability based on whether a real client is connected. */ restoreProxyAvailability(): void; + /** Restore only the host browser proxy (used by chrome-extension drains). */ + restoreBrowserProxyAvailability(): void; emitActivityState( phase: | "idle" @@ -311,10 +313,27 @@ export async function drainQueue( // returns false and tool execution falls back to local. if (next.isInteractive === false) { conversation.clearProxyAvailability(); + // chrome-extension is non-interactive (no SSE prompter UI) but DOES have + // a connected client that can service host_browser_request events. The + // unconditional clear above turned its hostBrowserProxy off; restore it + // here so the queued turn can still drive the browser via CDP. + const drainInterfaceCtx = + queuedInterfaceCtx ?? conversation.getTurnInterfaceContext(); + const drainInterface = drainInterfaceCtx?.userMessageInterface; + if ( + drainInterface && + !supportsHostProxy(drainInterface) && + supportsHostProxy(drainInterface, "host_browser") + ) { + conversation.restoreBrowserProxyAvailability(); + } } else { // Restore proxy availability only for desktop-originating turns (macos) // in case a prior non-interactive drain disabled it. Non-desktop interactive - // interfaces (CLI, Vellum) should not re-enable desktop host proxies. + // interfaces (CLI, Vellum) should not re-enable desktop host proxies. The + // chrome-extension interface only supports host_browser, not the desktop + // proxies or computer-use, so it is excluded by the no-arg form of + // supportsHostProxy (which returns false for chrome-extension). const interfaceCtx = queuedInterfaceCtx ?? conversation.getTurnInterfaceContext(); const sourceInterface = interfaceCtx?.userMessageInterface; diff --git a/assistant/src/daemon/conversation-tool-setup.ts b/assistant/src/daemon/conversation-tool-setup.ts index f6f163528e2..fa360ed675f 100644 --- a/assistant/src/daemon/conversation-tool-setup.ts +++ b/assistant/src/daemon/conversation-tool-setup.ts @@ -6,6 +6,11 @@ * keeping the constructor body focused on wiring. */ +import { + type HostProxyCapability, + type InterfaceId, + supportsHostProxy, +} from "../channels/types.js"; import { isHttpAuthDisabled } from "../config/env.js"; import { getIsPlatform } from "../config/env-registry.js"; import type { CesClient } from "../credential-execution/client.js"; @@ -459,6 +464,15 @@ export interface SkillProjectionContext { subagentAllowedTools?: Set; /** True when this conversation belongs to a subagent spawned by SubagentManager. */ readonly isSubagent?: boolean; + /** + * The interface id of the connected client driving the current turn (e.g. + * "macos", "chrome-extension"). Used to gate host tools by per-capability + * `supportsHostProxy(transport, capability)` so that interfaces which only + * support a subset of the host proxy set (e.g. chrome-extension supports + * `host_browser` but not `host_bash`/`host_file`) do not leak unsupported + * host tools into the LLM tool definitions. + */ + readonly transportInterface?: InterfaceId; } // ── Conditional tool sets ──────────────────────────────────────────── @@ -469,6 +483,25 @@ const HOST_TOOL_NAMES = new Set([ "host_file_write", "host_file_edit", "host_bash", + "host_browser", +]); +/** + * Maps each host tool name to the host proxy capability that the connected + * client interface must support. `isToolActiveForContext` uses this to gate + * each host tool individually so that partial-capability transports (e.g. + * chrome-extension only supports `host_browser`) only see the host tools + * their interface can actually service. + * + * Note: there is no `host_cu` tool exposed via the tool gating layer today; + * computer-use is preactivated as a skill and projected through the skill + * tools path. Only the host tools listed in `HOST_TOOL_NAMES` need entries. + */ +const HOST_TOOL_TO_CAPABILITY = new Map([ + ["host_bash", "host_bash"], + ["host_file_read", "host_file"], + ["host_file_write", "host_file"], + ["host_file_edit", "host_file"], + ["host_browser", "host_browser"], ]); const CLIENT_CAPABILITY_TOOL_NAMES = new Set(["app_open"]); const PLATFORM_TOOL_NAMES = new Set(["request_system_permission"]); @@ -498,9 +531,27 @@ export function isToolActiveForContext( return ctx.channelCapabilities?.supportsDynamicUi ?? !ctx.hasNoClient; } if (HOST_TOOL_NAMES.has(name)) { - // Host tools require a connected client — without one, there is no human - // to approve execution and the guardian auto-approve path would allow - // unchecked host command execution on the daemon host. + const capability = HOST_TOOL_TO_CAPABILITY.get(name); + const transport = ctx.transportInterface; + + // Per-capability check is authoritative for structural support: if the + // transport cannot service this capability, the tool is filtered out. + if (transport && capability && !supportsHostProxy(transport, capability)) { + return false; + } + + // chrome-extension is its own executor — the extension's popup gates + // commands via its own UI, and the transport does not use an SSE-level + // interactive approval channel. hasNoClient is intentionally `true` for + // chrome-extension turns (chrome-extension is not in INTERACTIVE_INTERFACES) + // and must not gate host_browser. Trust the per-capability check. + if (transport === "chrome-extension") { + return true; + } + + // For transports that surface approvals over SSE (macos, backwards-compat + // fallback), deny when no client is present so the guardian auto-approve + // path cannot execute host commands unattended. return !ctx.hasNoClient; } if (CLIENT_CAPABILITY_TOOL_NAMES.has(name)) { diff --git a/assistant/src/daemon/conversation.ts b/assistant/src/daemon/conversation.ts index 8f16f6a75f4..3b7bb564f4e 100644 --- a/assistant/src/daemon/conversation.ts +++ b/assistant/src/daemon/conversation.ts @@ -18,6 +18,7 @@ import type { ResolvedSystemPrompt } from "../agent/loop.js"; import { AgentLoop } from "../agent/loop.js"; import type { + InterfaceId, TurnChannelContext, TurnInterfaceContext, } from "../channels/types.js"; @@ -184,6 +185,19 @@ export class Conversation { /** @internal */ hostBrowserProxy?: HostBrowserProxy; /** @internal */ hostCuProxy?: HostCuProxy; /** @internal */ hostFileProxy?: HostFileProxy; + /** + * Optional override sender used by `restoreBrowserProxyAvailability` so + * non-SSE transports (e.g. chrome-extension, whose host_browser_request + * frames flow through the ChromeExtensionRegistry WebSocket rather than + * the SSE hub) can preserve their registry-routed sender across drain + * queue restores. When set, `restoreBrowserProxyAvailability()` uses this + * function instead of `sendToClient` so the drain-queue path doesn't + * clobber the chrome-extension sender with the SSE hub emitter. + * + * Populated by the POST /messages handler for chrome-extension turns and + * cleared when an unrelated interface takes over (see `updateClient`). + */ + /** @internal */ hostBrowserSenderOverride?: (msg: ServerMessage) => void; /** @internal */ cesClient?: CesClient; /** @internal */ readonly queue = new MessageQueue(); /** @internal */ currentActiveSurfaceId?: string; @@ -545,6 +559,37 @@ export class Conversation { } } + /** + * Restore host browser proxy availability only. Used for non-desktop + * interfaces (e.g. chrome-extension) that support host_browser but not + * the full desktop proxy set, so calling restoreProxyAvailability() would + * incorrectly re-enable bash/file/CU proxies that should stay disabled. + * + * Unlike `restoreProxyAvailability()`, this helper does NOT gate on + * `hasNoClient`. The chrome-extension interface is non-interactive (so + * `hasNoClient === true`), but it DOES have a connected client that can + * service `host_browser_request` events. Gating on `hasNoClient` would + * leave the just-constructed proxy unavailable and the only way to make + * it available would be to flip `hasNoClient` false, which would + * incorrectly enable host_bash/host_file/host_cu tool gating downstream. + * + * When `hostBrowserSenderOverride` is set, that function is used as the + * sender instead of `sendToClient`. This is required for the + * chrome-extension interface whose host_browser frames route through the + * ChromeExtensionRegistry WebSocket rather than the SSE hub: if the + * queue-drain path called this helper with `sendToClient`, the + * registry-routed sender established at turn-start would be clobbered by + * the SSE hub emitter and host_browser_request frames would stop + * reaching the extension. + * + * Callers must only invoke this when they know the current interface + * supports host_browser (see `supportsHostProxy(id, "host_browser")`). + */ + restoreBrowserProxyAvailability(): void { + const sender = this.hostBrowserSenderOverride ?? this.sendToClient; + this.hostBrowserProxy?.updateSender(sender, true); + } + setSubagentAllowedTools(tools: Set | undefined): void { this.subagentAllowedTools = tools; } @@ -1046,6 +1091,16 @@ export class Conversation { return this.currentTurnInterfaceContext; } + /** + * Implements the `transportInterface` field of `SkillProjectionContext` so + * that `isToolActiveForContext` can gate host tools by per-capability + * `supportsHostProxy(transport, capability)`. Derived from the live turn + * interface context so it tracks the connected client across turns. + */ + get transportInterface(): InterfaceId | undefined { + return this.currentTurnInterfaceContext?.userMessageInterface; + } + async persistUserMessage( content: string, attachments: UserMessageAttachment[], diff --git a/assistant/src/daemon/handlers/conversations.ts b/assistant/src/daemon/handlers/conversations.ts index 9615e941b19..727de4d270c 100644 --- a/assistant/src/daemon/handlers/conversations.ts +++ b/assistant/src/daemon/handlers/conversations.ts @@ -306,22 +306,29 @@ export async function handleConversationCreate( userMessageInterface: transportInterface, assistantMessageInterface: transportInterface, }); - // Only create the host bash proxy for desktop client interfaces that can - // execute commands on the user's machine. Set before updateClient so - // updateClient's call to hostBashProxy.updateSender targets the new proxy. - if (supportsHostProxy(transportInterface)) { + // Only create each host proxy for interfaces that support the matching + // capability. macOS supports all four; the chrome-extension interface only + // supports host_browser. Set before updateClient so updateClient's call to + // hostBashProxy.updateSender targets the new proxy. + if (supportsHostProxy(transportInterface, "host_bash")) { const proxy = new HostBashProxy(sendEvent, (requestId) => { pendingInteractions.resolve(requestId); }); conversationObj.setHostBashProxy(proxy); + } + if (supportsHostProxy(transportInterface, "host_browser")) { const browserProxy = new HostBrowserProxy(sendEvent, (requestId) => { pendingInteractions.resolve(requestId); }); conversationObj.setHostBrowserProxy(browserProxy); + } + if (supportsHostProxy(transportInterface, "host_file")) { const fileProxy = new HostFileProxy(sendEvent, (requestId) => { pendingInteractions.resolve(requestId); }); conversationObj.setHostFileProxy(fileProxy); + } + if (supportsHostProxy(transportInterface, "host_cu")) { const cuProxy = new HostCuProxy(sendEvent, (requestId) => { pendingInteractions.resolve(requestId); }); diff --git a/assistant/src/daemon/host-bash-proxy.ts b/assistant/src/daemon/host-bash-proxy.ts index 9f1d4a3706f..6345e5424ca 100644 --- a/assistant/src/daemon/host-bash-proxy.ts +++ b/assistant/src/daemon/host-bash-proxy.ts @@ -14,6 +14,8 @@ interface PendingRequest { reject: (err: Error) => void; timer: ReturnType; timeoutSec: number; + /** Detach the abort listener from the caller's signal. No-op when no signal was passed. */ + detachAbort: () => void; } export class HostBashProxy { @@ -60,8 +62,14 @@ export class HostBashProxy { const timeoutSec = input.timeout_seconds ?? shellMaxTimeoutSec; // Proxy timeout: slightly after client-side timeout, but before executor's outer timeout const proxyTimeoutSec = timeoutSec + 3; + + // Declared up-front so onAbort (defined before detachAbort is assigned) + // can close over a stable reference once it's wired below. + let detachAbort: () => void = () => {}; + const timer = setTimeout(() => { this.pending.delete(requestId); + detachAbort(); this.onInternalResolve?.(requestId); log.warn( { requestId, command: input.command }, @@ -78,13 +86,14 @@ export class HostBashProxy { ); }, proxyTimeoutSec * 1000); - this.pending.set(requestId, { resolve, reject, timer, timeoutSec }); - if (signal) { const onAbort = () => { if (this.pending.has(requestId)) { clearTimeout(timer); this.pending.delete(requestId); + // Abort fired — nothing to detach, but call the no-op for symmetry + // so callers can rely on detachAbort being idempotent. + detachAbort(); this.onInternalResolve?.(requestId); try { this.sendToClient({ @@ -98,19 +107,43 @@ export class HostBashProxy { } }; signal.addEventListener("abort", onAbort, { once: true }); + detachAbort = () => signal.removeEventListener("abort", onAbort); } - this.sendToClient({ - type: "host_bash_request", - requestId, - conversationId, - command: input.command, - working_dir: input.working_dir, - timeout_seconds: input.timeout_seconds, - ...(input.env && Object.keys(input.env).length > 0 - ? { env: input.env } - : {}), - } as ServerMessage); + this.pending.set(requestId, { + resolve, + reject, + timer, + timeoutSec, + detachAbort, + }); + + try { + this.sendToClient({ + type: "host_bash_request", + requestId, + conversationId, + command: input.command, + working_dir: input.working_dir, + timeout_seconds: input.timeout_seconds, + ...(input.env && Object.keys(input.env).length > 0 + ? { env: input.env } + : {}), + } as ServerMessage); + } catch (err) { + // Sender threw synchronously (e.g. client transport error during + // event emission). Clean up pending state and timer so we don't + // leak an in-flight entry that nothing will ever resolve. + clearTimeout(timer); + this.pending.delete(requestId); + detachAbort(); + this.onInternalResolve?.(requestId); + log.warn( + { requestId, command: input.command, err }, + "Host bash proxy send failed", + ); + reject(err instanceof Error ? err : new Error(String(err))); + } }); } @@ -129,6 +162,7 @@ export class HostBashProxy { return; } clearTimeout(entry.timer); + entry.detachAbort(); this.pending.delete(requestId); const result = formatShellOutput( response.stdout, @@ -151,6 +185,7 @@ export class HostBashProxy { dispose(): void { for (const [requestId, entry] of this.pending) { clearTimeout(entry.timer); + entry.detachAbort(); this.onInternalResolve?.(requestId); try { this.sendToClient({ diff --git a/assistant/src/daemon/host-cu-proxy.ts b/assistant/src/daemon/host-cu-proxy.ts index c35cba30ad3..68b0adeb16d 100644 --- a/assistant/src/daemon/host-cu-proxy.ts +++ b/assistant/src/daemon/host-cu-proxy.ts @@ -57,6 +57,8 @@ interface PendingRequest { resolve: (result: ToolExecutionResult) => void; reject: (err: Error) => void; timer: ReturnType; + /** Detach the abort listener from the caller's signal. No-op when no signal was passed. */ + detachAbort: () => void; } // --------------------------------------------------------------------------- @@ -152,8 +154,13 @@ export class HostCuProxy { const requestId = uuid(); return new Promise((resolve, reject) => { + // Declared up-front so onAbort (defined before detachAbort is assigned) + // can close over a stable reference once it's wired below. + let detachAbort: () => void = () => {}; + const timer = setTimeout(() => { this.pending.delete(requestId); + detachAbort(); this.onInternalResolve?.(requestId); log.warn({ requestId, toolName }, "Host CU proxy request timed out"); resolve({ @@ -162,13 +169,14 @@ export class HostCuProxy { }); }, REQUEST_TIMEOUT_SEC * 1000); - this.pending.set(requestId, { resolve, reject, timer }); - if (signal) { const onAbort = () => { if (this.pending.has(requestId)) { clearTimeout(timer); this.pending.delete(requestId); + // Abort fired — nothing to detach, but call the no-op for symmetry + // so callers can rely on detachAbort being idempotent. + detachAbort(); this.onInternalResolve?.(requestId); try { this.sendToClient({ @@ -182,17 +190,32 @@ export class HostCuProxy { } }; signal.addEventListener("abort", onAbort, { once: true }); + detachAbort = () => signal.removeEventListener("abort", onAbort); } - this.sendToClient({ - type: "host_cu_request", - requestId, - conversationId, - toolName, - input, - stepNumber, - reasoning, - } as ServerMessage); + this.pending.set(requestId, { resolve, reject, timer, detachAbort }); + + try { + this.sendToClient({ + type: "host_cu_request", + requestId, + conversationId, + toolName, + input, + stepNumber, + reasoning, + } as ServerMessage); + } catch (err) { + // Sender threw synchronously (e.g. client transport error during + // event emission). Clean up pending state and timer so we don't + // leak an in-flight entry that nothing will ever resolve. + clearTimeout(timer); + this.pending.delete(requestId); + detachAbort(); + this.onInternalResolve?.(requestId); + log.warn({ requestId, toolName, err }, "Host CU proxy send failed"); + reject(err instanceof Error ? err : new Error(String(err))); + } }); } @@ -203,6 +226,7 @@ export class HostCuProxy { return; } clearTimeout(entry.timer); + entry.detachAbort(); this.pending.delete(requestId); // Capture pre-update state so formatObservation sees the correct previous AX tree @@ -388,6 +412,7 @@ export class HostCuProxy { dispose(): void { for (const [requestId, entry] of this.pending) { clearTimeout(entry.timer); + entry.detachAbort(); this.onInternalResolve?.(requestId); try { this.sendToClient({ diff --git a/assistant/src/daemon/host-file-proxy.ts b/assistant/src/daemon/host-file-proxy.ts index d0d2a4062c4..21b2eaea3df 100644 --- a/assistant/src/daemon/host-file-proxy.ts +++ b/assistant/src/daemon/host-file-proxy.ts @@ -23,6 +23,8 @@ interface PendingRequest { resolve: (result: ToolExecutionResult) => void; reject: (err: Error) => void; timer: ReturnType; + /** Detach the abort listener from the caller's signal. No-op when no signal was passed. */ + detachAbort: () => void; } export class HostFileProxy { @@ -61,8 +63,14 @@ export class HostFileProxy { return new Promise((resolve, reject) => { // File operations should be fast — 30 second timeout. const timeoutSec = 30; + + // Declared up-front so onAbort (defined before detachAbort is assigned) + // can close over a stable reference once it's wired below. + let detachAbort: () => void = () => {}; + const timer = setTimeout(() => { this.pending.delete(requestId); + detachAbort(); this.onInternalResolve?.(requestId); log.warn( { requestId, operation: input.operation }, @@ -74,13 +82,14 @@ export class HostFileProxy { }); }, timeoutSec * 1000); - this.pending.set(requestId, { resolve, reject, timer }); - if (signal) { const onAbort = () => { if (this.pending.has(requestId)) { clearTimeout(timer); this.pending.delete(requestId); + // Abort fired — nothing to detach, but call the no-op for symmetry + // so callers can rely on detachAbort being idempotent. + detachAbort(); this.onInternalResolve?.(requestId); try { this.sendToClient({ @@ -94,14 +103,32 @@ export class HostFileProxy { } }; signal.addEventListener("abort", onAbort, { once: true }); + detachAbort = () => signal.removeEventListener("abort", onAbort); } - this.sendToClient({ - ...input, - type: "host_file_request", - requestId, - conversationId, - } as ServerMessage); + this.pending.set(requestId, { resolve, reject, timer, detachAbort }); + + try { + this.sendToClient({ + ...input, + type: "host_file_request", + requestId, + conversationId, + } as ServerMessage); + } catch (err) { + // Sender threw synchronously (e.g. client transport error during + // event emission). Clean up pending state and timer so we don't + // leak an in-flight entry that nothing will ever resolve. + clearTimeout(timer); + this.pending.delete(requestId); + detachAbort(); + this.onInternalResolve?.(requestId); + log.warn( + { requestId, operation: input.operation, err }, + "Host file proxy send failed", + ); + reject(err instanceof Error ? err : new Error(String(err))); + } }); } @@ -115,6 +142,7 @@ export class HostFileProxy { return; } clearTimeout(entry.timer); + entry.detachAbort(); this.pending.delete(requestId); entry.resolve({ content: response.content, isError: response.isError }); } @@ -130,6 +158,7 @@ export class HostFileProxy { dispose(): void { for (const [requestId, entry] of this.pending) { clearTimeout(entry.timer); + entry.detachAbort(); this.onInternalResolve?.(requestId); try { this.sendToClient({ diff --git a/assistant/src/daemon/lifecycle.ts b/assistant/src/daemon/lifecycle.ts index ff1bd30844d..de3808e896a 100644 --- a/assistant/src/daemon/lifecycle.ts +++ b/assistant/src/daemon/lifecycle.ts @@ -75,6 +75,11 @@ import { mintPairingBearerToken, resolveSigningKey, } from "../runtime/auth/token-service.js"; +import { + initCapabilityTokenSecret, + loadOrCreateCapabilityTokenSecret, + writeDaemonTokenFallback, +} from "../runtime/capability-tokens.js"; import { ensureVellumGuardianBinding } from "../runtime/guardian-vellum-migration.js"; import { RuntimeHttpServer } from "../runtime/http-server.js"; import { startScheduler } from "../schedule/scheduler.js"; @@ -270,6 +275,20 @@ export async function runDaemon(): Promise { const signingKey = resolveSigningKey(); initAuthSigningKey(signingKey); + // Load (or generate + persist) the capability-token HMAC secret used + // to mint scoped tokens for the chrome extension pair endpoint. + // Wrapped in try/catch so a disk failure here never blocks startup — + // tokens can still be minted using a lazy on-demand load inside the + // capability-tokens module. + try { + initCapabilityTokenSecret(loadOrCreateCapabilityTokenSecret()); + } catch (err) { + log.warn( + { err }, + "Failed to pre-load capability token secret — continuing startup (lazy load will handle subsequent calls)", + ); + } + // Pre-populate the feature flag cache from the gateway so all // subsequent sync isAssistantFeatureFlagEnabled() calls have data. // Fired non-blocking so a slow or unreachable gateway doesn't delay @@ -432,8 +451,11 @@ export async function runDaemon(): Promise { // Ensure a vellum guardian binding exists so the identity system works // without requiring a manual bootstrap step. + let localGuardianPrincipalId = "local"; try { - ensureVellumGuardianBinding(DAEMON_INTERNAL_ASSISTANT_ID); + localGuardianPrincipalId = ensureVellumGuardianBinding( + DAEMON_INTERNAL_ASSISTANT_ID, + ); } catch (err) { log.warn( { err }, @@ -441,6 +463,19 @@ export async function runDaemon(): Promise { ); } + // Write a dev-only fallback capability token to `~/.vellum/daemon-token` + // so developers can manually pair the chrome extension without the + // native messaging helper. Production pairing goes through + // `POST /v1/browser-extension-pair` via the native helper. + try { + writeDaemonTokenFallback(localGuardianPrincipalId); + } catch (err) { + log.warn( + { err }, + "Failed to write dev daemon-token fallback — continuing startup", + ); + } + try { syncUpdateBulletinOnStartup(); } catch (err) { diff --git a/assistant/src/daemon/server.ts b/assistant/src/daemon/server.ts index 17566c3770c..2daa4f19586 100644 --- a/assistant/src/daemon/server.ts +++ b/assistant/src/daemon/server.ts @@ -1090,13 +1090,14 @@ export class DaemonServer { options?.transport?.chatType, ), ); - // Only create the host bash proxy for desktop client interfaces that can - // execute commands on the user's machine. Non-desktop conversations (CLI, - // channels, headless) fall back to local execution. + // Only create each host proxy for interfaces that support the matching + // capability. macOS supports all four; the chrome-extension interface only + // supports host_browser. Non-desktop conversations (CLI, channels, headless) + // fall back to local execution. // Guard: don't replace an active proxy during concurrent turn races — // another request may have started processing between the isProcessing() // check above and the await on ensureActorScopedHistory(). - if (supportsHostProxy(resolvedInterface)) { + if (supportsHostProxy(resolvedInterface, "host_bash")) { if (!conversation.isProcessing() || !conversation.hostBashProxy) { conversation.setHostBashProxy( new HostBashProxy(conversation.getCurrentSender(), (requestId) => { @@ -1104,6 +1105,10 @@ export class DaemonServer { }), ); } + } else if (!conversation.isProcessing()) { + conversation.setHostBashProxy(undefined); + } + if (supportsHostProxy(resolvedInterface, "host_browser")) { if (!conversation.isProcessing() || !conversation.hostBrowserProxy) { conversation.setHostBrowserProxy( new HostBrowserProxy(conversation.getCurrentSender(), (requestId) => { @@ -1111,6 +1116,10 @@ export class DaemonServer { }), ); } + } else if (!conversation.isProcessing()) { + conversation.setHostBrowserProxy(undefined); + } + if (supportsHostProxy(resolvedInterface, "host_file")) { if (!conversation.isProcessing() || !conversation.hostFileProxy) { conversation.setHostFileProxy( new HostFileProxy(conversation.getCurrentSender(), (requestId) => { @@ -1118,6 +1127,10 @@ export class DaemonServer { }), ); } + } else if (!conversation.isProcessing()) { + conversation.setHostFileProxy(undefined); + } + if (supportsHostProxy(resolvedInterface, "host_cu")) { if (!conversation.isProcessing() || !conversation.hostCuProxy) { conversation.setHostCuProxy( new HostCuProxy(conversation.getCurrentSender(), (requestId) => { @@ -1127,9 +1140,6 @@ export class DaemonServer { } conversation.addPreactivatedSkillId("computer-use"); } else if (!conversation.isProcessing()) { - conversation.setHostBashProxy(undefined); - conversation.setHostBrowserProxy(undefined); - conversation.setHostFileProxy(undefined); conversation.setHostCuProxy(undefined); } conversation.setCommandIntent(options?.commandIntent ?? null); @@ -1208,8 +1218,25 @@ export class DaemonServer { } } : registrar; + // Non-interactive interfaces that still have a connected client capable + // of handling host_browser_request events (e.g. chrome-extension) need + // their hostBrowserProxy explicitly marked connected. The proxy + // constructor defaults clientConnected = false, so without an explicit + // sender update the chrome-extension proxy would be created and + // immediately unavailable. We do NOT call updateClient(onEvent, false) + // for that case, because flipping hasNoClient false would also enable + // host_bash/host_file/host_cu tool gating for an interface that can't + // service them. Instead, provision just the browser proxy's sender. + const persistInterfaceCtx = conversation.getTurnInterfaceContext(); + const persistInterface = persistInterfaceCtx?.userMessageInterface; if (options?.isInteractive === true) { conversation.updateClient(onEvent, false); + } else if ( + persistInterface && + !supportsHostProxy(persistInterface) && + supportsHostProxy(persistInterface, "host_browser") + ) { + conversation.hostBrowserProxy?.updateSender(onEvent, true); } conversation diff --git a/assistant/src/runtime/__tests__/browser-extension-pair-routes.test.ts b/assistant/src/runtime/__tests__/browser-extension-pair-routes.test.ts new file mode 100644 index 00000000000..d297bf575f3 --- /dev/null +++ b/assistant/src/runtime/__tests__/browser-extension-pair-routes.test.ts @@ -0,0 +1,399 @@ +/** + * Tests for the /v1/browser-extension-pair capability-token pair endpoint. + * + * Covers: + * - Method/host/origin enforcement (405, 403, 400, 401) + * - Successful mint on allowed origin (200) for both the preferred + * `extensionOrigin` body field and the legacy `origin` alias + * - `expiresAt` response field is an ISO 8601 string matching what the + * native messaging helper validates + * - IPv6 loopback `Host` header variants (bracketed and bare) are + * accepted + * - Issued token round-trips through `verifyHostBrowserCapability` + * - Tampered tokens fail verification + */ + +import { randomBytes } from "node:crypto"; +import { beforeEach, describe, expect, test } from "bun:test"; + +import { + resetCapabilityTokenSecretForTests, + setCapabilityTokenSecretForTests, + verifyHostBrowserCapability, +} from "../capability-tokens.js"; +import { + handleBrowserExtensionPair, + parseHostHeader, +} from "../routes/browser-extension-pair-routes.js"; + +// --------------------------------------------------------------------------- +// Test helpers +// --------------------------------------------------------------------------- + +type ServerWithRequestIP = { + requestIP( + req: Request, + ): { address: string; family: string; port: number } | null; +}; + +function mockServer(address: string): ServerWithRequestIP { + return { + requestIP: () => ({ address, family: "IPv4", port: 0 }), + }; +} + +const loopbackServer = mockServer("127.0.0.1"); +const lanPeerServer = mockServer("192.168.1.10"); +const publicPeerServer = mockServer("203.0.113.50"); + +function buildRequest( + options: { + method?: string; + body?: unknown; + host?: string | null; + origin?: string; + forwardedFor?: string; + rawBody?: string; + } = {}, +): Request { + const headers = new Headers(); + if (options.host !== null) { + headers.set("host", options.host ?? "127.0.0.1:8765"); + } + if (options.forwardedFor) { + headers.set("x-forwarded-for", options.forwardedFor); + } + let bodyStr: string | undefined; + if (options.rawBody !== undefined) { + bodyStr = options.rawBody; + headers.set("content-type", "application/json"); + } else if (options.body !== undefined) { + bodyStr = JSON.stringify(options.body); + headers.set("content-type", "application/json"); + } + return new Request("http://127.0.0.1:8765/v1/browser-extension-pair", { + method: options.method ?? "POST", + headers, + body: bodyStr, + }); +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe("handleBrowserExtensionPair", () => { + beforeEach(() => { + resetCapabilityTokenSecretForTests(); + setCapabilityTokenSecretForTests(randomBytes(32)); + }); + + test("rejects non-POST methods with 405", async () => { + const req = buildRequest({ + method: "GET", + body: { + extensionOrigin: "chrome-extension://aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/", + }, + }); + const res = await handleBrowserExtensionPair(req, loopbackServer); + expect(res.status).toBe(405); + }); + + test("rejects non-loopback peer with 403", async () => { + const req = buildRequest({ + body: { + extensionOrigin: "chrome-extension://aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/", + }, + }); + const res = await handleBrowserExtensionPair(req, publicPeerServer); + expect(res.status).toBe(403); + }); + + test("rejects LAN peer (not loopback) with 403", async () => { + const req = buildRequest({ + body: { + extensionOrigin: "chrome-extension://aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/", + }, + }); + const res = await handleBrowserExtensionPair(req, lanPeerServer); + expect(res.status).toBe(403); + }); + + test("rejects request with non-loopback Host header", async () => { + const req = buildRequest({ + body: { + extensionOrigin: "chrome-extension://aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/", + }, + host: "vellum.example.com", + }); + const res = await handleBrowserExtensionPair(req, loopbackServer); + expect(res.status).toBe(403); + }); + + test("rejects request with x-forwarded-for header", async () => { + const req = buildRequest({ + body: { + extensionOrigin: "chrome-extension://aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/", + }, + forwardedFor: "1.2.3.4", + }); + const res = await handleBrowserExtensionPair(req, loopbackServer); + expect(res.status).toBe(403); + }); + + test("returns 400 when body is missing", async () => { + const req = buildRequest({}); + const res = await handleBrowserExtensionPair(req, loopbackServer); + expect(res.status).toBe(400); + }); + + test("returns 400 when body is malformed JSON", async () => { + const req = buildRequest({ rawBody: "{not json" }); + const res = await handleBrowserExtensionPair(req, loopbackServer); + expect(res.status).toBe(400); + }); + + test("returns 400 when extensionOrigin is missing", async () => { + const req = buildRequest({ body: {} }); + const res = await handleBrowserExtensionPair(req, loopbackServer); + expect(res.status).toBe(400); + }); + + test("returns 400 when extensionOrigin is not a string", async () => { + const req = buildRequest({ body: { extensionOrigin: 42 } }); + const res = await handleBrowserExtensionPair(req, loopbackServer); + expect(res.status).toBe(400); + }); + + test("returns 400 when legacy origin field is not a string", async () => { + const req = buildRequest({ body: { origin: 42 } }); + const res = await handleBrowserExtensionPair(req, loopbackServer); + expect(res.status).toBe(400); + }); + + test("returns 401 when extensionOrigin is not on the allowlist", async () => { + const req = buildRequest({ + body: { extensionOrigin: "chrome-extension://not-allowed/" }, + }); + const res = await handleBrowserExtensionPair(req, loopbackServer); + expect(res.status).toBe(401); + }); + + test("returns 200 with a valid token for the preferred extensionOrigin field", async () => { + const req = buildRequest({ + body: { + extensionOrigin: "chrome-extension://aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/", + }, + }); + const res = await handleBrowserExtensionPair(req, loopbackServer); + expect(res.status).toBe(200); + + const payload = (await res.json()) as { + token: string; + expiresAt: string; + guardianId: string; + }; + + expect(typeof payload.token).toBe("string"); + expect(payload.token.length).toBeGreaterThan(0); + + // expiresAt must be an ISO 8601 string (matching what the + // chrome-extension-native-host helper validates) and must be in + // the future. + expect(typeof payload.expiresAt).toBe("string"); + expect(payload.expiresAt).toMatch( + /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z$/, + ); + const expiresAtMs = Date.parse(payload.expiresAt); + expect(Number.isNaN(expiresAtMs)).toBe(false); + expect(expiresAtMs).toBeGreaterThan(Date.now()); + + expect(typeof payload.guardianId).toBe("string"); + expect(payload.guardianId.length).toBeGreaterThan(0); + + // Token should round-trip through verifyHostBrowserCapability. + const claims = verifyHostBrowserCapability(payload.token); + expect(claims).not.toBeNull(); + expect(claims?.capability).toBe("host_browser_command"); + expect(claims?.guardianId).toBe(payload.guardianId); + // The numeric claim expiry should match the ISO response field. + expect(claims?.expiresAt).toBe(expiresAtMs); + }); + + test("returns 200 using the legacy `origin` field for backwards compat", async () => { + const req = buildRequest({ + body: { origin: "chrome-extension://aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/" }, + }); + const res = await handleBrowserExtensionPair(req, loopbackServer); + expect(res.status).toBe(200); + const payload = (await res.json()) as { + token: string; + expiresAt: string; + }; + expect(typeof payload.token).toBe("string"); + expect(typeof payload.expiresAt).toBe("string"); + }); + + test("prefers extensionOrigin over legacy origin when both are provided", async () => { + // extensionOrigin is on the allowlist, `origin` is not — so the + // request must succeed because we honor `extensionOrigin` first. + const req = buildRequest({ + body: { + extensionOrigin: "chrome-extension://aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/", + origin: "chrome-extension://not-allowed/", + }, + }); + const res = await handleBrowserExtensionPair(req, loopbackServer); + expect(res.status).toBe(200); + }); + + test("accepts loopback Host header variants", async () => { + const variants = [ + "localhost:8765", + "127.0.0.1:8765", + "127.0.0.1", + "localhost", + "127.1.2.3:8765", + "[::1]:8765", + "[::1]", + "::1", + ]; + for (const host of variants) { + const req = buildRequest({ + body: { + extensionOrigin: + "chrome-extension://aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/", + }, + host, + }); + const res = await handleBrowserExtensionPair(req, loopbackServer); + expect(res.status).toBe(200); + } + }); + + test("rejects malformed bracketed Host header", async () => { + const req = buildRequest({ + body: { + extensionOrigin: "chrome-extension://aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/", + }, + host: "[::1", // missing closing bracket + }); + const res = await handleBrowserExtensionPair(req, loopbackServer); + expect(res.status).toBe(403); + }); + + test("rejects bracketed Host header with junk after closing bracket", async () => { + // Defensive against `[::1]attacker.com`-style injection: the parser + // used to silently truncate at the first `]` and treat the rest as + // the hostname, which would let an attacker spoof a non-loopback + // host while still passing the loopback Host header check. + const req = buildRequest({ + body: { + extensionOrigin: "chrome-extension://aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/", + }, + host: "[::1]attacker.com", + }); + const res = await handleBrowserExtensionPair(req, loopbackServer); + expect(res.status).toBe(403); + }); + + test("rejects non-loopback IPv6 Host header", async () => { + const req = buildRequest({ + body: { + extensionOrigin: "chrome-extension://aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/", + }, + host: "[2001:db8::1]:8765", + }); + const res = await handleBrowserExtensionPair(req, loopbackServer); + expect(res.status).toBe(403); + }); + + test("parseHostHeader handles IPv4, IPv6, and bracketed forms", () => { + expect(parseHostHeader("localhost:8765")).toBe("localhost"); + expect(parseHostHeader("127.0.0.1:8765")).toBe("127.0.0.1"); + expect(parseHostHeader("127.0.0.1")).toBe("127.0.0.1"); + expect(parseHostHeader("[::1]:8765")).toBe("::1"); + expect(parseHostHeader("[::1]")).toBe("::1"); + expect(parseHostHeader("::1")).toBe("::1"); + expect(parseHostHeader("[2001:db8::1]:443")).toBe("2001:db8::1"); + expect(parseHostHeader("[::1")).toBeNull(); + expect(parseHostHeader("")).toBeNull(); + // Anything after the closing bracket that isn't an optional ":port" + // must be rejected — otherwise `[::1]attacker.com` would slip past + // the loopback check by parsing as `::1`. + expect(parseHostHeader("[::1]attacker.com")).toBeNull(); + expect(parseHostHeader("[::1]extra")).toBeNull(); + }); + + test("tampered tokens fail verification", async () => { + const req = buildRequest({ + body: { + extensionOrigin: "chrome-extension://aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/", + }, + }); + const res = await handleBrowserExtensionPair(req, loopbackServer); + expect(res.status).toBe(200); + + const payload = (await res.json()) as { token: string }; + const originalToken = payload.token; + + // Modify the signature: flip the last character. + const [head, sig] = originalToken.split("."); + const lastChar = sig.slice(-1); + const replacement = lastChar === "A" ? "B" : "A"; + const tamperedToken = `${head}.${sig.slice(0, -1)}${replacement}`; + + expect(verifyHostBrowserCapability(tamperedToken)).toBeNull(); + // The original token should still verify. + expect(verifyHostBrowserCapability(originalToken)).not.toBeNull(); + }); + + test("tokens minted with a different secret fail verification", async () => { + // Mint a token, then swap the secret — verification should fail. + const req = buildRequest({ + body: { + extensionOrigin: "chrome-extension://aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/", + }, + }); + const res = await handleBrowserExtensionPair(req, loopbackServer); + expect(res.status).toBe(200); + const payload = (await res.json()) as { token: string }; + + // Swap secret and re-verify. + setCapabilityTokenSecretForTests(randomBytes(32)); + expect(verifyHostBrowserCapability(payload.token)).toBeNull(); + }); + + test("rejects tampered payload even with matching signature length", async () => { + const req = buildRequest({ + body: { + extensionOrigin: "chrome-extension://aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/", + }, + }); + const res = await handleBrowserExtensionPair(req, loopbackServer); + expect(res.status).toBe(200); + const payload = (await res.json()) as { token: string }; + const [head, sig] = payload.token.split("."); + + // Swap the payload for a different base64url value of equivalent shape. + const bogusPayload = Buffer.from( + JSON.stringify({ + capability: "host_browser_command", + guardianId: "attacker", + nonce: "00".repeat(16), + expiresAt: Date.now() + 60_000, + }), + "utf8", + ) + .toString("base64") + .replace(/\+/g, "-") + .replace(/\//g, "_") + .replace(/=+$/, ""); + + const tampered = `${bogusPayload}.${sig}`; + // Keep `head` referenced so the test reads naturally even though we + // do not use it after tampering. + expect(head).toBeTruthy(); + expect(verifyHostBrowserCapability(tampered)).toBeNull(); + }); +}); diff --git a/assistant/src/runtime/__tests__/chrome-extension-registry.test.ts b/assistant/src/runtime/__tests__/chrome-extension-registry.test.ts new file mode 100644 index 00000000000..5a85071e417 --- /dev/null +++ b/assistant/src/runtime/__tests__/chrome-extension-registry.test.ts @@ -0,0 +1,162 @@ +import { beforeEach, describe, expect, test } from "bun:test"; + +import type { ServerMessage } from "../../daemon/message-protocol.js"; +import { + __resetChromeExtensionRegistryForTests, + type ChromeExtensionConnection, + ChromeExtensionRegistry, + getChromeExtensionRegistry, +} from "../chrome-extension-registry.js"; + +// Minimal structural stand-in for Bun's ServerWebSocket. Only the methods +// the registry touches (`send`, `close`) are modeled; the rest of the Bun +// ServerWebSocket API is out of scope for these unit tests. +interface FakeWs { + send: (data: string) => number; + close: (code?: number, reason?: string) => void; + sent: string[]; + closed: { code?: number; reason?: string }[]; + sendShouldThrow?: boolean; +} + +function makeFakeWs(): FakeWs { + const sent: string[] = []; + const closed: { code?: number; reason?: string }[] = []; + const ws: FakeWs = { + sent, + closed, + send(data: string) { + if (ws.sendShouldThrow) { + throw new Error("simulated ws.send failure"); + } + sent.push(data); + return data.length; + }, + close(code?: number, reason?: string) { + closed.push({ code, reason }); + }, + }; + return ws; +} + +function makeConnection( + guardianId: string, + id?: string, +): { conn: ChromeExtensionConnection; fakeWs: FakeWs } { + const fakeWs = makeFakeWs(); + const conn: ChromeExtensionConnection = { + id: id ?? crypto.randomUUID(), + guardianId, + ws: fakeWs as unknown as ChromeExtensionConnection["ws"], + connectedAt: Date.now(), + }; + return { conn, fakeWs }; +} + +describe("ChromeExtensionRegistry", () => { + beforeEach(() => { + __resetChromeExtensionRegistryForTests(); + }); + + test("register stores the connection under the guardianId", () => { + const registry = new ChromeExtensionRegistry(); + const { conn } = makeConnection("guardian-alpha"); + registry.register(conn); + expect(registry.get("guardian-alpha")).toBe(conn); + }); + + test("unregister removes the connection", () => { + const registry = new ChromeExtensionRegistry(); + const { conn } = makeConnection("guardian-alpha"); + registry.register(conn); + registry.unregister(conn.id); + expect(registry.get("guardian-alpha")).toBeUndefined(); + }); + + test("unregister is a no-op when the connectionId is unknown", () => { + const registry = new ChromeExtensionRegistry(); + // Should not throw even though nothing is registered. + expect(() => registry.unregister("unknown-connection")).not.toThrow(); + }); + + test("registering a second connection for the same guardianId closes the prior one", () => { + const registry = new ChromeExtensionRegistry(); + const { conn: conn1, fakeWs: fakeWs1 } = makeConnection( + "guardian-alpha", + "conn-1", + ); + const { conn: conn2 } = makeConnection("guardian-alpha", "conn-2"); + registry.register(conn1); + registry.register(conn2); + // Prior connection should have been closed with code 1000. + expect(fakeWs1.closed).toHaveLength(1); + expect(fakeWs1.closed[0].code).toBe(1000); + // Registry should hold the new connection. + expect(registry.get("guardian-alpha")).toBe(conn2); + }); + + test("registering the same connection id twice is idempotent and does not close itself", () => { + const registry = new ChromeExtensionRegistry(); + const { conn, fakeWs } = makeConnection("guardian-alpha", "conn-1"); + registry.register(conn); + registry.register(conn); + expect(fakeWs.closed).toHaveLength(0); + expect(registry.get("guardian-alpha")).toBe(conn); + }); + + test("send returns false when no connection exists for the guardian", () => { + const registry = new ChromeExtensionRegistry(); + const msg: ServerMessage = { + type: "host_browser_cancel", + requestId: "req-1", + } as ServerMessage; + expect(registry.send("missing-guardian", msg)).toBe(false); + }); + + test("send returns true and forwards the JSON-serialized message when a connection exists", () => { + const registry = new ChromeExtensionRegistry(); + const { conn, fakeWs } = makeConnection("guardian-alpha"); + registry.register(conn); + const msg: ServerMessage = { + type: "host_browser_cancel", + requestId: "req-1", + } as ServerMessage; + const ok = registry.send("guardian-alpha", msg); + expect(ok).toBe(true); + expect(fakeWs.sent).toHaveLength(1); + const parsed = JSON.parse(fakeWs.sent[0]); + expect(parsed.type).toBe("host_browser_cancel"); + expect(parsed.requestId).toBe("req-1"); + }); + + test("send returns false when ws.send throws (best-effort delivery)", () => { + const registry = new ChromeExtensionRegistry(); + const { conn, fakeWs } = makeConnection("guardian-alpha"); + fakeWs.sendShouldThrow = true; + registry.register(conn); + const msg: ServerMessage = { + type: "host_browser_cancel", + requestId: "req-1", + } as ServerMessage; + expect(registry.send("guardian-alpha", msg)).toBe(false); + }); + + test("getChromeExtensionRegistry returns a module-level singleton", () => { + const first = getChromeExtensionRegistry(); + const second = getChromeExtensionRegistry(); + expect(first).toBe(second); + }); + + test("unregister after supersession does not remove the new connection", () => { + // When a new connection supersedes an older one, the close handler for + // the older socket will fire later and call unregister with the OLD id. + // That must not clobber the newer registration. + const registry = new ChromeExtensionRegistry(); + const { conn: old } = makeConnection("guardian-alpha", "old-id"); + const { conn: fresh } = makeConnection("guardian-alpha", "fresh-id"); + registry.register(old); + registry.register(fresh); + registry.unregister("old-id"); + expect(registry.get("guardian-alpha")).toBe(fresh); + }); +}); diff --git a/assistant/src/runtime/auth/__tests__/guard-tests.test.ts b/assistant/src/runtime/auth/__tests__/guard-tests.test.ts index 72359cb570b..4a2da2ae02c 100644 --- a/assistant/src/runtime/auth/__tests__/guard-tests.test.ts +++ b/assistant/src/runtime/auth/__tests__/guard-tests.test.ts @@ -63,6 +63,7 @@ describe("route policy coverage", () => { // excluded because they are handled before JWT auth and are not composed // into buildRouteTable(). const PRE_AUTH_ROUTE_MODULES = new Set([ + "browser-extension-pair-routes.ts", "guardian-bootstrap-routes.ts", "guardian-refresh-routes.ts", ]); diff --git a/assistant/src/runtime/capability-tokens.ts b/assistant/src/runtime/capability-tokens.ts new file mode 100644 index 00000000000..2d1f8cedba4 --- /dev/null +++ b/assistant/src/runtime/capability-tokens.ts @@ -0,0 +1,382 @@ +/** + * Capability token minting and verification for scoped, short-lived tokens + * issued to the chrome extension (and other thin clients) so they can submit + * results back to the runtime without a full guardian-bound JWT. + * + * Design: + * - Tokens are HMAC-SHA256 signed over a JSON claims payload. + * - Claims include a bound capability, guardian id, nonce, and expiry. + * - Signing uses a long-lived random secret persisted to + * `~/.vellum/protected/` with 0600 permissions. The protected + * directory sits outside the workspace per AGENTS.md: workspace + * directories must not hold security-sensitive material. + * - The secret is generated once on first launch and reused across + * subsequent daemon restarts so previously-minted tokens still verify. + * - Tests inject their own secret via `setCapabilityTokenSecretForTests`. + * + * The encoded token format is `.`. + */ + +import { createHmac, randomBytes, timingSafeEqual } from "node:crypto"; +import { + chmodSync, + existsSync, + mkdirSync, + readFileSync, + renameSync, + unlinkSync, + writeFileSync, +} from "node:fs"; +import { homedir } from "node:os"; +import { dirname, join } from "node:path"; + +import { getLogger } from "../util/logger.js"; +import { getDataDir, getProtectedDir } from "../util/platform.js"; + +const log = getLogger("capability-tokens"); + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- + +/** Capability identifiers that can be bound to a capability token. */ +export type Capability = "host_browser_command"; + +/** Claims encoded in the signed payload. */ +export interface CapabilityClaims { + capability: Capability; + guardianId: string; + /** 16-byte random nonce, hex-encoded. Prevents replay across fresh mints. */ + nonce: string; + /** ms-since-epoch expiry. */ + expiresAt: number; +} + +/** A freshly-minted capability token and its absolute expiry. */ +export interface CapabilityToken { + token: string; + expiresAt: number; +} + +// --------------------------------------------------------------------------- +// Secret lifecycle +// --------------------------------------------------------------------------- + +let _secret: Buffer | undefined; + +/** + * Returns the canonical path where the capability-token secret is + * persisted: `~/.vellum/protected/capability-token-secret`. The protected + * directory is the canonical location for security-sensitive material + * and sits outside the workspace (which AGENTS.md forbids for secrets). + */ +function getSecretPath(): string { + return join(getProtectedDir(), "capability-token-secret"); +} + +/** + * Legacy path under `workspace/data/` where earlier builds persisted the + * capability-token secret. We keep this as a read-only migration source + * so existing deployments don't regenerate their secret (and invalidate + * every outstanding token) on upgrade — the first launch after the + * upgrade copies the legacy file into `getProtectedDir()` and removes it + * from the workspace. + */ +function getLegacySecretPath(): string { + return join(getDataDir(), "capability-token-secret"); +} + +/** + * Load the capability-token secret from disk or generate and persist a new + * one. Atomically writes with mode 0o600 so the secret is not readable by + * other users on the same host. + * + * Migration: if the secret exists only at the legacy workspace path, copy + * it into the protected directory and delete the workspace copy so we do + * not leave security-sensitive material inside `workspace/`. + */ +export function loadOrCreateCapabilityTokenSecret(): Buffer { + const keyPath = getSecretPath(); + if (existsSync(keyPath)) { + try { + const raw = readFileSync(keyPath); + if (raw.length === 32) { + return raw; + } + log.warn( + { keyPath, length: raw.length }, + "capability token secret has unexpected length — regenerating", + ); + } catch (err) { + log.warn( + { err, keyPath }, + "Failed to read capability token secret — regenerating", + ); + } + } + + // Attempt to migrate a legacy workspace-directory secret before we + // generate a fresh one. If this succeeds we end up with the legacy + // secret persisted at the protected path and the workspace copy + // removed, preserving every outstanding token across the upgrade. + const migrated = migrateLegacyCapabilityTokenSecret(); + if (migrated) { + return migrated; + } + + const fresh = randomBytes(32); + writeSecretAtomic(keyPath, fresh); + log.info("Capability token secret generated and persisted"); + return fresh; +} + +/** + * Write `secret` to `keyPath` atomically with mode 0o600. Ensures the + * parent directory exists. + */ +function writeSecretAtomic(keyPath: string, secret: Buffer): void { + const dir = dirname(keyPath); + if (!existsSync(dir)) { + mkdirSync(dir, { recursive: true }); + } + const tmpPath = `${keyPath}.tmp.${process.pid}`; + writeFileSync(tmpPath, secret, { mode: 0o600 }); + renameSync(tmpPath, keyPath); + try { + chmodSync(keyPath, 0o600); + } catch (err) { + log.warn( + { err, keyPath }, + "Failed to chmod capability token secret after write", + ); + } +} + +/** + * If a pre-migration capability token secret exists under the workspace + * data directory, copy it into the protected directory and remove the + * workspace copy. Returns the migrated secret if migration ran + * successfully, or `undefined` if there was nothing to migrate or the + * migration failed. + */ +function migrateLegacyCapabilityTokenSecret(): Buffer | undefined { + const legacyPath = getLegacySecretPath(); + if (!existsSync(legacyPath)) { + return undefined; + } + try { + const raw = readFileSync(legacyPath); + if (raw.length !== 32) { + log.warn( + { legacyPath, length: raw.length }, + "legacy capability token secret has unexpected length — ignoring", + ); + return undefined; + } + writeSecretAtomic(getSecretPath(), raw); + try { + unlinkSync(legacyPath); + } catch (err) { + log.warn( + { err, legacyPath }, + "Failed to remove legacy workspace capability token secret after migration", + ); + } + log.info( + { from: legacyPath, to: getSecretPath() }, + "Migrated capability token secret out of workspace into protected directory", + ); + return raw; + } catch (err) { + log.warn( + { err, legacyPath }, + "Failed to migrate legacy capability token secret — regenerating", + ); + return undefined; + } +} + +/** + * Initialize the module-level secret. Called once at daemon startup. Safe + * to call multiple times — subsequent calls overwrite the cached value + * (useful in tests that reset state). + */ +export function initCapabilityTokenSecret(secret: Buffer): void { + if (secret.length !== 32) { + throw new Error( + `capability token secret must be 32 bytes, got ${secret.length}`, + ); + } + _secret = secret; +} + +/** + * Test-only helper to inject a deterministic secret. + */ +export function setCapabilityTokenSecretForTests(secret: Buffer): void { + _secret = secret; +} + +/** + * Reset the cached secret. Test-only — exposed so test isolation can + * force a reload from disk. + */ +export function resetCapabilityTokenSecretForTests(): void { + _secret = undefined; +} + +function getSecret(): Buffer { + if (_secret) return _secret; + if (process.env.NODE_ENV === "test") { + _secret = randomBytes(32); + return _secret; + } + // Lazy load — daemon startup is expected to call + // `initCapabilityTokenSecret(loadOrCreateCapabilityTokenSecret())` but + // we fall back to a disk load here so unit tests and early call sites + // don't have to depend on startup ordering. + _secret = loadOrCreateCapabilityTokenSecret(); + return _secret; +} + +// --------------------------------------------------------------------------- +// Mint / verify +// --------------------------------------------------------------------------- + +const DEFAULT_TTL_MS = 30 * 60 * 1000; // 30 minutes + +function base64urlEncode(buf: Buffer): string { + return buf + .toString("base64") + .replace(/\+/g, "-") + .replace(/\//g, "_") + .replace(/=+$/, ""); +} + +function base64urlDecode(s: string): Buffer { + const pad = s.length % 4 === 0 ? 0 : 4 - (s.length % 4); + const b64 = s.replace(/-/g, "+").replace(/_/g, "/") + "=".repeat(pad); + return Buffer.from(b64, "base64"); +} + +function sign(payload: string, secret: Buffer): string { + return base64urlEncode(createHmac("sha256", secret).update(payload).digest()); +} + +/** + * Mint a capability token bound to the `host_browser_command` capability + * for the given guardian id. Default TTL is 30 minutes. + */ +export function mintHostBrowserCapability( + guardianId: string, + ttlMs: number = DEFAULT_TTL_MS, +): CapabilityToken { + const expiresAt = Date.now() + ttlMs; + const nonce = randomBytes(16).toString("hex"); + const claims: CapabilityClaims = { + capability: "host_browser_command", + guardianId, + nonce, + expiresAt, + }; + const payload = base64urlEncode(Buffer.from(JSON.stringify(claims), "utf8")); + const sig = sign(payload, getSecret()); + return { token: `${payload}.${sig}`, expiresAt }; +} + +/** + * Verify a capability token. Returns the decoded claims on success or null + * if the signature is invalid, the payload is malformed, the token has + * expired, or the bound capability is not `host_browser_command`. + * + * Signature comparison uses `timingSafeEqual` to avoid leaking the secret + * through timing side channels. + */ +export function verifyHostBrowserCapability( + token: string, +): CapabilityClaims | null { + if (typeof token !== "string") return null; + const dot = token.indexOf("."); + if (dot < 0) return null; + const payload = token.slice(0, dot); + const sig = token.slice(dot + 1); + if (!payload || !sig) return null; + + const expected = sign(payload, getSecret()); + const a = Buffer.from(sig, "utf8"); + const b = Buffer.from(expected, "utf8"); + if (a.length !== b.length) return null; + if (!timingSafeEqual(a, b)) return null; + + let claims: CapabilityClaims; + try { + claims = JSON.parse( + base64urlDecode(payload).toString("utf8"), + ) as CapabilityClaims; + } catch { + return null; + } + + if (!claims || typeof claims !== "object") return null; + if (claims.capability !== "host_browser_command") return null; + if (typeof claims.guardianId !== "string" || claims.guardianId.length === 0) { + return null; + } + if (typeof claims.nonce !== "string" || claims.nonce.length === 0) { + return null; + } + if (typeof claims.expiresAt !== "number" || claims.expiresAt <= Date.now()) { + return null; + } + return claims; +} + +// --------------------------------------------------------------------------- +// Dev-only fallback token file +// --------------------------------------------------------------------------- + +/** + * Path to the dev-pairing fallback token file. The runtime writes a freshly + * minted capability token to this location on daemon startup so developers + * can manually pair the chrome extension without wiring the native + * messaging helper. Production users should pair via the native helper + * (PRs 7/12/13). + */ +export function getDaemonTokenFilePath(): string { + // Always under `~/.vellum/` (not the configurable workspace dir) so the + // native messaging helper can find it at a fixed path regardless of + // workspace overrides. This is a dev-only convenience path — production + // pairing goes through the native messaging flow. + return join(homedir(), ".vellum", "daemon-token"); +} + +/** + * Write a freshly-minted capability token to `~/.vellum/daemon-token` with + * 0600 permissions. Swallows errors so a failure here never blocks daemon + * startup — this is a dev-convenience path, not a production auth + * requirement. + */ +export function writeDaemonTokenFallback(guardianId: string): void { + try { + const { token } = mintHostBrowserCapability(guardianId); + const filePath = getDaemonTokenFilePath(); + const dir = dirname(filePath); + if (!existsSync(dir)) { + mkdirSync(dir, { recursive: true }); + } + const tmpPath = `${filePath}.tmp.${process.pid}`; + writeFileSync(tmpPath, token, { mode: 0o600 }); + renameSync(tmpPath, filePath); + try { + chmodSync(filePath, 0o600); + } catch { + // best-effort + } + log.info({ filePath }, "Dev capability token written to daemon-token file"); + } catch (err) { + log.warn( + { err }, + "Failed to write dev capability token file; manual pairing still available via /v1/browser-extension-pair", + ); + } +} diff --git a/assistant/src/runtime/chrome-extension-registry.ts b/assistant/src/runtime/chrome-extension-registry.ts new file mode 100644 index 00000000000..bbc742a22fe --- /dev/null +++ b/assistant/src/runtime/chrome-extension-registry.ts @@ -0,0 +1,116 @@ +/** + * Registry mapping guardianId → active Chrome extension WebSocket connections. + * + * Populated by the `/v1/browser-relay` WebSocket upgrade handler when a + * chrome-extension client connects; drained on close. Used by + * conversation-routes.ts to route `host_browser_request` frames to the + * connected extension for the appropriate guardian. + * + * This is the chrome-extension counterpart to the SSE hub used by the macOS + * client for the same purpose. + */ + +import type { ServerWebSocket } from "bun"; + +import type { ServerMessage } from "../daemon/message-protocol.js"; +import { getLogger } from "../util/logger.js"; + +const log = getLogger("chrome-extension-registry"); + +export interface ChromeExtensionConnection { + /** Stable identifier for this WebSocket connection (used for unregister). */ + id: string; + /** Guardian identity this connection is authenticated as. */ + guardianId: string; + /** Underlying Bun WebSocket. */ + ws: ServerWebSocket; + /** Wall-clock timestamp (ms) when the connection was registered. */ + connectedAt: number; +} + +/** + * Module-level registry of active chrome-extension connections keyed by + * guardianId. There is at most one connection per guardian — reconnects + * supersede the prior entry by closing it first. + */ +export class ChromeExtensionRegistry { + private byGuardian = new Map(); + + /** + * Register a chrome-extension WebSocket for a guardian. If a prior + * connection already exists for the same guardianId, it is closed and + * replaced with the new one. + */ + register(conn: ChromeExtensionConnection): void { + const prior = this.byGuardian.get(conn.guardianId); + if (prior && prior.id !== conn.id) { + try { + prior.ws.close(1000, "superseded by new connection"); + } catch { + // Best-effort — the prior socket may already be closed. + } + } + this.byGuardian.set(conn.guardianId, conn); + log.info( + { guardianId: conn.guardianId, connectionId: conn.id }, + "chrome extension registered", + ); + } + + /** + * Remove the entry with the given connectionId. No-op if no connection + * with that id is currently registered — the entry may already have been + * superseded by a newer registration. + */ + unregister(connectionId: string): void { + for (const [key, conn] of this.byGuardian) { + if (conn.id === connectionId) { + this.byGuardian.delete(key); + log.info( + { guardianId: key, connectionId }, + "chrome extension unregistered", + ); + return; + } + } + } + + /** Return the active connection for a guardian, if any. */ + get(guardianId: string): ChromeExtensionConnection | undefined { + return this.byGuardian.get(guardianId); + } + + /** + * Send a ServerMessage to the chrome-extension connection for the given + * guardian. Returns `true` when a connection exists and the send + * succeeds; `false` when no connection is registered or when the + * underlying `ws.send` throws. + */ + send(guardianId: string, msg: ServerMessage): boolean { + const conn = this.byGuardian.get(guardianId); + if (!conn) return false; + try { + conn.ws.send(JSON.stringify(msg)); + return true; + } catch (err) { + log.warn({ guardianId, err }, "failed to send to chrome extension"); + return false; + } + } +} + +// ── Module-level singleton (same pattern as assistant-event-hub) ────────── +let instance: ChromeExtensionRegistry | null = null; + +export function getChromeExtensionRegistry(): ChromeExtensionRegistry { + if (!instance) instance = new ChromeExtensionRegistry(); + return instance; +} + +/** + * Test helper: reset the module-level singleton so each test starts with a + * fresh registry. Not exported from any public index — test-only. + */ +export function __resetChromeExtensionRegistryForTests(): void { + instance = null; +} diff --git a/assistant/src/runtime/http-server.ts b/assistant/src/runtime/http-server.ts index 46827db5ce9..5b95fc0ba70 100644 --- a/assistant/src/runtime/http-server.ts +++ b/assistant/src/runtime/http-server.ts @@ -68,12 +68,14 @@ import { assistantEventHub } from "./assistant-event-hub.js"; import { DAEMON_INTERNAL_ASSISTANT_ID } from "./assistant-scope.js"; // Auth import { authenticateRequest } from "./auth/middleware.js"; +import { parseSub } from "./auth/subject.js"; import { mintDaemonDeliveryToken, mintUiPageToken, verifyToken, } from "./auth/token-service.js"; import { sweepFailedEvents } from "./channel-retry-sweep.js"; +import { getChromeExtensionRegistry } from "./chrome-extension-registry.js"; import { httpError } from "./http-errors.js"; import type { RouteDefinition } from "./http-router.js"; import { HttpRouter } from "./http-router.js"; @@ -110,6 +112,7 @@ import { attachmentRouteDefinitions } from "./routes/attachment-routes.js"; import { handleGetAudio } from "./routes/audio-routes.js"; import { avatarRouteDefinitions } from "./routes/avatar-routes.js"; import { brainGraphRouteDefinitions } from "./routes/brain-graph-routes.js"; +import { handleBrowserExtensionPair } from "./routes/browser-extension-pair-routes.js"; import { btwRouteDefinitions } from "./routes/btw-routes.js"; import { callRouteDefinitions } from "./routes/call-routes.js"; import { @@ -333,6 +336,19 @@ export class RuntimeHttpServer { extensionRelayServer.handleOpen( ws as ServerWebSocket, ); + // When the JWT sub resolved to a guardian principal at upgrade + // time, also register this connection with the chrome-extension + // registry so host_browser_request frames can be routed to it. + // The legacy ExtensionCommand protocol handled by + // extensionRelayServer continues to work in parallel. + if (data.guardianId) { + getChromeExtensionRegistry().register({ + id: data.connectionId, + guardianId: data.guardianId, + ws, + connectedAt: Date.now(), + }); + } return; } const callSessionId = (data as RelayWebSocketData).callSessionId; @@ -372,6 +388,12 @@ export class RuntimeHttpServer { code, reason?.toString(), ); + // Always attempt to unregister — the registry uses connectionId + // as the key and no-ops if the entry is absent (e.g. when the + // connection was never registered because guardianId was + // undefined, or when it was superseded by a newer registration + // for the same guardian). + getChromeExtensionRegistry().unregister(data.connectionId); return; } const callSessionId = (data as RelayWebSocketData).callSessionId; @@ -533,6 +555,13 @@ export class RuntimeHttpServer { return handlePairingStatus(url, this.pairingContext); } + // Chrome extension capability-token pair endpoint — unauthenticated but + // restricted to loopback peers + an extension-id allowlist. Used by the + // native messaging helper to bootstrap a scoped token. + if (path === "/v1/browser-extension-pair") { + return await handleBrowserExtensionPair(req, server); + } + // Guardian bootstrap and refresh endpoints — before JWT auth because // bootstrap is the first endpoint called to obtain a JWT, and refresh // needs to work when the access token is expired. Bootstrap has its @@ -635,6 +664,24 @@ export class RuntimeHttpServer { ); } + // When auth is enabled we parse the JWT sub to extract the actor + // principal ID, which we use as the guardianId key for the + // ChromeExtensionRegistry. When auth is disabled (dev bypass), + // guardianId remains undefined and the registration is skipped — + // host_browser_request routing requires an authenticated guardian. + // + // Gateway path: when the WebSocket upgrade is proxied through the + // gateway, the upstream token minted by `mintServiceToken()` has + // `sub=svc:gateway:self` with no actor principal id. In that case + // we fall back to an explicit `x-guardian-id` header / query param + // so the runtime can still register the connection under the real + // guardian. TODO(gateway-plumbing): the gateway's + // `browser-relay-websocket.ts` does not yet forward this header — + // once it does (resolving the actor from the downstream edge token + // at upgrade time), the service-token branch below will start + // picking up the guardianId. Until then, cloud-path registration + // silently no-ops, which is a known limitation tracked for Phase 3. + let guardianId: string | undefined; if (!isHttpAuthDisabled()) { const wsUrl = new URL(req.url); const token = wsUrl.searchParams.get("token"); @@ -645,6 +692,23 @@ export class RuntimeHttpServer { if (!jwtResult.ok) { return httpError("UNAUTHORIZED", "Unauthorized", 401); } + const subResult = parseSub(jwtResult.claims.sub); + if (subResult.ok && subResult.actorPrincipalId) { + // Direct actor principal — this is the loopback / desktop path. + guardianId = subResult.actorPrincipalId; + } else { + // Service-token path (gateway-forwarded). Look for an explicit + // guardian id plumbed by the gateway as a header or query + // param. Header takes precedence because headers are easier + // for the gateway to forward without rewriting the URL. + const headerGuardianId = req.headers.get("x-guardian-id")?.trim() ?? ""; + const queryGuardianId = + wsUrl.searchParams.get("guardianId")?.trim() ?? ""; + const fallbackGuardianId = headerGuardianId || queryGuardianId; + if (fallbackGuardianId) { + guardianId = fallbackGuardianId; + } + } } const connectionId = crypto.randomUUID(); @@ -652,6 +716,7 @@ export class RuntimeHttpServer { data: { wsType: "browser-relay", connectionId, + guardianId, } satisfies BrowserRelayWebSocketData, }); if (!upgraded) { diff --git a/assistant/src/runtime/routes/browser-extension-pair-routes.ts b/assistant/src/runtime/routes/browser-extension-pair-routes.ts new file mode 100644 index 00000000000..64f506e1349 --- /dev/null +++ b/assistant/src/runtime/routes/browser-extension-pair-routes.ts @@ -0,0 +1,226 @@ +/** + * Route handler for `POST /v1/browser-extension-pair`. + * + * Mints a short-lived, scoped `host_browser_command` capability token for a + * chrome extension that has proved (via the native messaging helper) it is + * running locally with an allowlisted extension id. + * + * Security properties: + * - **Localhost-only**: enforced by both the TCP peer IP (via + * `server.requestIP`) and the `Host` header. Non-localhost callers + * receive a 403. + * - **Origin allowlist**: the body must include `extensionOrigin` + * matching a hard-coded allowlist of known Vellum chrome extension + * ids. This is a minimal check; real enforcement happens in the + * native messaging helper which vets the extension id that spawned + * it (PR 7). + * - **No auth header required**: the native messaging bootstrap flow + * runs before the extension has any token. The localhost + origin + * checks are the only gate. + * + * Request body: `{ extensionOrigin: string }` (also accepts the legacy + * `{ origin: string }` for backwards compatibility). + * Response body: `{ token, expiresAt, guardianId }` — `expiresAt` is an + * ISO 8601 timestamp string matching what the native + * messaging helper validates. + */ + +import { findGuardianForChannel } from "../../contacts/contact-store.js"; +import { getLogger } from "../../util/logger.js"; +import { mintHostBrowserCapability } from "../capability-tokens.js"; +import { httpError } from "../http-errors.js"; +import { isLoopbackAddress } from "../middleware/auth.js"; + +const log = getLogger("browser-extension-pair"); + +/** Bun server shape needed for requestIP. */ +export type PairServerContext = { + requestIP( + req: Request, + ): { address: string; family: string; port: number } | null; +}; + +/** + * Hard-coded allowlist of chrome extension origins permitted to request a + * capability token. Mirrors the placeholder id used by the native messaging + * helper at `clients/chrome-extension-native-host/src/index.ts` + * (`ALLOWED_EXTENSION_IDS`). Both lists must agree for the dev pair flow + * to work end-to-end — update them together before release. + */ +export const ALLOWED_EXTENSION_ORIGINS: ReadonlySet = new Set([ + // Dev placeholder — replaced when the unpacked extension is loaded locally. + // TODO: production chrome extension id before release + "chrome-extension://aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/", +]); + +/** + * Parse an HTTP `Host` header value and extract the hostname portion. + * + * Handles IPv6 bracket notation (`[::1]:8765`), unbracketed IPv6 + * (`::1`), hostname with port (`localhost:8765`), and bare hostnames + * (`localhost`). Returns `null` when the header is malformed (e.g. + * missing closing bracket, or content after the closing bracket that + * isn't an optional `:port`). + * + * Exported for testing. + */ +export function parseHostHeader(raw: string): string | null { + if (raw.length === 0) return null; + // IPv6 literal in brackets, e.g. `[::1]` or `[::1]:8765`. + if (raw.startsWith("[")) { + const end = raw.indexOf("]"); + if (end < 0) return null; + // After the closing bracket only an optional ":port" is valid. Anything + // else (e.g. `[::1]attacker.com`) is a malformed Host header that an + // attacker could craft to slip a non-loopback hostname past the parser. + const after = raw.substring(end + 1); + if (after.length > 0 && !after.startsWith(":")) return null; + return raw.substring(1, end); + } + // Bare IPv6 (no brackets) contains multiple colons and should be + // treated as a whole. Anything with a single colon is `host:port`. + const firstColon = raw.indexOf(":"); + if (firstColon < 0) return raw; + const secondColon = raw.indexOf(":", firstColon + 1); + if (secondColon >= 0) { + // Multiple colons and no brackets — assume unbracketed IPv6. + return raw; + } + return raw.substring(0, firstColon); +} + +/** + * Returns true if the Host header (if present) points at a loopback + * address. We accept a missing Host header because some HTTP clients + * (notably node test harnesses) omit it. + */ +function isLoopbackHostHeader(host: string | null): boolean { + if (!host) return true; + const parsed = parseHostHeader(host); + if (parsed === null) return false; + const hostname = parsed.toLowerCase(); + if (hostname === "localhost") return true; + if (hostname === "127.0.0.1") return true; + if (hostname === "::1") return true; + if (hostname.startsWith("127.")) { + // Matches the 127.0.0.0/8 loopback range (e.g. 127.0.0.1, 127.1.2.3). + const parts = hostname.split("."); + if (parts.length !== 4) return false; + return parts.every((p) => /^\d+$/.test(p) && Number(p) <= 255); + } + return false; +} + +/** + * Resolve the guardian id to bind the capability token to. Phase 2 uses + * the local vellum guardian principal when one exists, falling back to + * the string `"local"` for fresh installs that haven't bootstrapped a + * guardian yet. + */ +function resolveLocalGuardianId(): string { + try { + const result = findGuardianForChannel("vellum"); + if (result?.contact.principalId) { + return result.contact.principalId; + } + } catch (err) { + log.warn( + { err }, + "Failed to look up local vellum guardian; falling back to 'local'", + ); + } + return "local"; +} + +/** + * Handle POST /v1/browser-extension-pair. + * + * Body: `{ extensionOrigin: string }` (also accepts legacy + * `{ origin: string }` for backwards compatibility). + * Returns: `{ token, expiresAt, guardianId }` where `expiresAt` is an + * ISO 8601 timestamp string that the native messaging helper + * validates as a string. + */ +export async function handleBrowserExtensionPair( + req: Request, + server: PairServerContext, +): Promise { + if (req.method !== "POST") { + return new Response("method not allowed", { + status: 405, + headers: { Allow: "POST" }, + }); + } + + // Enforce localhost-only via peer IP. + const peer = server.requestIP(req); + const peerIp = peer?.address ?? ""; + if (!peerIp || !isLoopbackAddress(peerIp)) { + log.warn({ peerIp }, "Rejecting browser-extension-pair from non-loopback"); + return httpError("FORBIDDEN", "endpoint is local-only", 403); + } + + // Secondary check: Host header. Rejects requests that slip past the + // TCP-level check via proxies that rewrite the peer address. + const host = req.headers.get("host"); + if (!isLoopbackHostHeader(host)) { + log.warn( + { host }, + "Rejecting browser-extension-pair with non-loopback Host header", + ); + return httpError("FORBIDDEN", "endpoint is local-only", 403); + } + + // Any `x-forwarded-for` header indicates the request was proxied from a + // non-local client. Reject — the pair endpoint is strictly machine-local. + if (req.headers.get("x-forwarded-for")) { + return httpError("FORBIDDEN", "endpoint is local-only", 403); + } + + let body: unknown; + try { + body = await req.json(); + } catch { + return httpError("BAD_REQUEST", "invalid JSON body", 400); + } + + if (!body || typeof body !== "object") { + return httpError("BAD_REQUEST", "body must be an object", 400); + } + + // Accept `extensionOrigin` (preferred, matches the native messaging + // helper) and fall back to `origin` (legacy, for any callers that + // haven't migrated yet). + const raw = body as { + extensionOrigin?: unknown; + origin?: unknown; + }; + const extensionOrigin = + typeof raw.extensionOrigin === "string" && raw.extensionOrigin.length > 0 + ? raw.extensionOrigin + : typeof raw.origin === "string" && raw.origin.length > 0 + ? raw.origin + : null; + if (extensionOrigin === null) { + return httpError("BAD_REQUEST", "extensionOrigin is required", 400); + } + + if (!ALLOWED_EXTENSION_ORIGINS.has(extensionOrigin)) { + log.warn( + { extensionOrigin }, + "Rejecting browser-extension-pair for disallowed origin", + ); + return httpError("UNAUTHORIZED", "unauthorized origin", 401); + } + + const guardianId = resolveLocalGuardianId(); + const { token, expiresAt } = mintHostBrowserCapability(guardianId); + const expiresAtIso = new Date(expiresAt).toISOString(); + + log.info( + { extensionOrigin, guardianId, expiresAt: expiresAtIso }, + "Issued chrome extension capability token", + ); + + return Response.json({ token, expiresAt: expiresAtIso, guardianId }); +} diff --git a/assistant/src/runtime/routes/conversation-routes.ts b/assistant/src/runtime/routes/conversation-routes.ts index cef41d5e281..86388e761ea 100644 --- a/assistant/src/runtime/routes/conversation-routes.ts +++ b/assistant/src/runtime/routes/conversation-routes.ts @@ -78,6 +78,7 @@ import { silentlyWithLog } from "../../util/silently.js"; import { buildAssistantEvent } from "../assistant-event.js"; import { DAEMON_INTERNAL_ASSISTANT_ID } from "../assistant-scope.js"; import type { AuthContext } from "../auth/types.js"; +import { getChromeExtensionRegistry } from "../chrome-extension-registry.js"; import { bridgeConfirmationRequestToGuardian } from "../confirmation-request-guardian-bridge.js"; import { routeGuardianReply } from "../guardian-reply-router.js"; import { healGuardianBindingDrift } from "../guardian-vellum-migration.js"; @@ -1146,12 +1147,13 @@ export async function handleSendMessage( conversation, ); const isInteractive = isInteractiveInterface(sourceInterface); - // Only create the host bash proxy for desktop client interfaces that can - // execute commands on the user's machine. Non-desktop conversations (CLI, - // channels, headless) fall back to local execution. + // Only create each host proxy for interfaces that support the matching + // capability. macOS supports all four; the chrome-extension interface only + // supports host_browser. Non-desktop conversations (CLI, channels, headless) + // fall back to local execution. // Set the proxy BEFORE updateClient so updateClient's call to // hostBashProxy.updateSender targets the correct (new) proxy. - if (supportsHostProxy(sourceInterface)) { + if (supportsHostProxy(sourceInterface, "host_bash")) { // Reuse the existing proxy if the conversation is actively processing a // host bash request to avoid orphaning in-flight requests. if (!conversation.isProcessing() || !conversation.hostBashProxy) { @@ -1160,18 +1162,72 @@ export async function handleSendMessage( }); conversation.setHostBashProxy(proxy); } + } else if (!conversation.isProcessing()) { + conversation.setHostBashProxy(undefined); + } + // For the chrome-extension interface we route host_browser_request / + // host_browser_cancel frames through the in-process ChromeExtensionRegistry + // to the WebSocket opened against /v1/browser-relay by the connected + // extension, instead of the SSE/onEvent hub used by macOS. The registry + // lookup is keyed by the JWT-derived actor principal id, which the + // runtime captured at WebSocket upgrade time. + // + // macOS (and any other interface that supports host_browser in the + // future via the SSE hub) keeps using `onEvent` — see the else branch. + const browserProxySendToClient: (msg: ServerMessage) => void = + sourceInterface === "chrome-extension" + ? (msg) => { + const gid = authContext.actorPrincipalId; + if (!gid) { + // No guardian identity on this turn — nothing to route to. + // The proxy will observe this via its try/catch and surface a + // transport error back to the caller. + throw new Error( + "chrome-extension host_browser send skipped: no guardianId on AuthContext", + ); + } + const ok = getChromeExtensionRegistry().send(gid, msg); + if (!ok) { + throw new Error( + `chrome-extension host_browser send failed: no active connection for guardian ${gid}`, + ); + } + } + : onEvent; + // Stash the registry-routed sender on the conversation so queue-drain + // restores (which run outside of conversation-routes.ts and only have + // access to `sendToClient`) can preserve it when calling + // `restoreBrowserProxyAvailability()`. For non-chrome-extension + // interfaces the override is cleared so the SSE hub sender is used. + if (sourceInterface === "chrome-extension") { + conversation.hostBrowserSenderOverride = browserProxySendToClient; + } else { + conversation.hostBrowserSenderOverride = undefined; + } + if (supportsHostProxy(sourceInterface, "host_browser")) { if (!conversation.isProcessing() || !conversation.hostBrowserProxy) { - const browserProxy = new HostBrowserProxy(onEvent, (requestId) => { - pendingInteractions.resolve(requestId); - }); + const browserProxy = new HostBrowserProxy( + browserProxySendToClient, + (requestId) => { + pendingInteractions.resolve(requestId); + }, + ); conversation.setHostBrowserProxy(browserProxy); } + } else if (!conversation.isProcessing()) { + conversation.setHostBrowserProxy(undefined); + } + if (supportsHostProxy(sourceInterface, "host_file")) { if (!conversation.isProcessing() || !conversation.hostFileProxy) { const fileProxy = new HostFileProxy(onEvent, (requestId) => { pendingInteractions.resolve(requestId); }); conversation.setHostFileProxy(fileProxy); } + } else if (!conversation.isProcessing()) { + conversation.setHostFileProxy(undefined); + } + if (supportsHostProxy(sourceInterface, "host_cu")) { if (!conversation.isProcessing() || !conversation.hostCuProxy) { const cuProxy = new HostCuProxy(onEvent, (requestId) => { pendingInteractions.resolve(requestId); @@ -1185,20 +1241,41 @@ export async function handleSendMessage( conversation.addPreactivatedSkillId("computer-use"); } } else if (!conversation.isProcessing()) { - conversation.setHostBashProxy(undefined); - conversation.setHostBrowserProxy(undefined); - conversation.setHostFileProxy(undefined); conversation.setHostCuProxy(undefined); } // Wire sendToClient to the SSE hub so all subsystems can reach the HTTP client. // Called after setHostBashProxy so updateSender targets the current proxy. // When proxies are preserved during an active turn (non-desktop request while - // processing), skip updating proxy senders to avoid degrading them. + // processing), skip updating proxy senders to avoid degrading them. The gate + // matches the host_bash capability because the legacy "reject send during + // host bash" flow is what this is really protecting. const preservingProxies = - conversation.isProcessing() && !supportsHostProxy(sourceInterface); + conversation.isProcessing() && + !supportsHostProxy(sourceInterface, "host_bash"); + // hasNoClient must remain `!isInteractive` so downstream tool gating + // (`isToolActiveForContext` for HOST_TOOL_NAMES, `createToolExecutor`'s + // `isInteractive: !ctx.hasNoClient`) keeps host_bash/host_file/host_cu + // tools gated for non-desktop interfaces. The chrome-extension interface + // is non-interactive (no SSE prompter UI) but still has a connected client + // that can service host_browser_request events; we restore that single + // proxy explicitly below without relaxing `hasNoClient`. conversation.updateClient(onEvent, !isInteractive, { skipProxySenderUpdate: preservingProxies, }); + // For non-interactive interfaces that DO support host_browser + // (chrome-extension), explicitly re-enable just the browser proxy. The + // helper bypasses the `hasNoClient` gate so the single-capability + // chrome-extension turn can drive the browser via CDP without leaking + // host_bash/host_file tool availability into tool gating. + // + // `restoreBrowserProxyAvailability()` reads `hostBrowserSenderOverride` + // (set above for chrome-extension) and applies the registry-routed + // sender, so the chrome-extension path gets the correct sender here + // — including after queue-drain restores run from conversation-process.ts, + // which only have access to the conversation instance. + if (supportsHostProxy(sourceInterface, "host_browser")) { + conversation.restoreBrowserProxyAvailability?.(); + } // ── Canned first-greeting fast path ── // On a completely fresh workspace, skip LLM inference for the macOS diff --git a/clients/chrome-extension-native-host/README.md b/clients/chrome-extension-native-host/README.md new file mode 100644 index 00000000000..9c9a1351d25 --- /dev/null +++ b/clients/chrome-extension-native-host/README.md @@ -0,0 +1,256 @@ +# @vellum/chrome-extension-native-host + +A tiny Node CLI binary that bridges the Vellum Chrome extension and the +locally-running Vellum assistant via [Chrome Native Messaging][cnm]. It +exists so the extension can bootstrap a scoped capability token for the +self-hosted (local-assistant) transport without ever shipping a long-lived +secret in the extension package itself. + +The macOS installer wiring landed in PR 12: the helper is now bundled +into the Mac `.app` under `Contents/MacOS/vellum-chrome-native-host` +via `clients/macos/build.sh` (see the "Bundling into the macOS app" +section below), and `NativeMessagingInstaller` writes the +`com.vellum.daemon.json` manifest into Chrome's per-user +`NativeMessagingHosts` directory at launch time. The extension-side +bootstrap flow that actually spawns this helper lands in PR 13, and the +runtime HTTP endpoint it talks to (`/v1/browser-extension-pair`) lands +in PR 11. + +[cnm]: https://developer.chrome.com/docs/extensions/develop/concepts/native-messaging + +## Why a separate binary? + +Chrome only allows extensions to talk to native code through the native +messaging protocol: a stdio pipe with a 4-byte little-endian length prefix +and a UTF-8 JSON body. The host binary is registered with Chrome via a +JSON manifest installed in a well-known location, and Chrome enforces an +allowlist of extension IDs in that manifest before it will spawn the +binary at all. + +Keeping the helper as its own package gives us: + +- A clean security boundary: the helper has zero imports from the + extension or the assistant, and verifies the calling extension + ID against a hard-coded allowlist before doing any work. +- A small, auditable surface: ~200 lines of TypeScript that compile to a + single Node CLI entry point. +- Simple distribution: the macOS installer (PR 12) just drops the + compiled `dist/index.js` and a manifest into `~/Library/Application + Support/Google/Chrome/NativeMessagingHosts/`. + +## Stdio contract + +The helper speaks a single request/response exchange per spawn. Chrome +re-spawns it on every `chrome.runtime.connectNative("com.vellum.daemon")` +call, so there is no long-lived session state. + +### Frame format + +Every message in either direction is framed as: + +``` ++--------+----------------------------+ +| u32 LE | UTF-8 JSON (len bytes) | +| length | | ++--------+----------------------------+ +``` + +`encodeFrame(payload)` and `decodeFrames(buf)` in `src/protocol.ts` are +the canonical implementations. They are pure functions — no I/O — and are +covered by `src/__tests__/protocol.test.ts`. + +### Messages + +The extension sends: + +```json +{ "type": "request_token" } +``` + +On success, the helper writes: + +```json +{ + "type": "token_response", + "token": "", + "expiresAt": "" +} +``` + +On any failure, the helper writes: + +```json +{ "type": "error", "message": "" } +``` + +…and exits with a non-zero status code. Possible `message` values +include `unauthorized_origin`, `unsupported_frame_type`, +`unexpected_additional_frame`, `protocol_error: malformed_frame_json: …` +(returned when stdin contains a frame whose body is not valid JSON), and +any error string surfaced by the underlying `fetch` to the assistant +(`failed to reach assistant at …`, `assistant pair request failed with +HTTP 503`, etc.). Per the project-wide terminology rule in `AGENTS.md`, +all user-visible strings refer to the local process as the "assistant". + +## Origin allowlist + +Chrome appends the calling extension's origin (e.g. +`chrome-extension://aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/`) as the first +positional argument when launching the host. The helper parses this, +extracts the bare extension ID, and rejects anything not in +`ALLOWED_EXTENSION_IDS` in `src/index.ts` before reading any stdin +bytes. + +Today the allowlist contains a single dev placeholder ID. The production +ID will be added before release — see the `// TODO: production id before +release` comment in `src/index.ts`. + +## Assistant port resolution + +The helper looks up the assistant's HTTP port using the following +precedence (highest first): + +1. **`--assistant-port `** CLI flag — accepts either + `--assistant-port 7822` or `--assistant-port=7822`. This exists so a + wrapper script registered in Chrome's `NativeMessagingHosts` manifest + can pin the helper to a known port for non-default installs (e.g. + named local instances spawned by `cli/src/lib/local.ts` which set + `RUNTIME_HTTP_PORT` from `resources.daemonPort`). +2. **`~/.vellum/runtime-port`** lockfile — a single integer written by + the assistant on startup. *Note: this lockfile is not yet written by + the assistant — see the TODO below.* Once it is, default installs + will not need any manifest-side configuration. +3. **`7821`** — the well-known default port. + +If a step fails (file missing, parse error, etc.), resolution falls +through to the next step. The subsequent HTTP request will surface a +clear connection error if the assistant isn't actually listening on the +resolved port. + +> **TODO (follow-up):** Have the assistant write its active HTTP port +> to `~/.vellum/runtime-port` on startup so the lockfile branch above +> starts working without requiring `--assistant-port`. This was +> intentionally left out of the scaffold PR (PR 7) to keep the change +> surface small. Until then, multi-instance installs should rely on the +> CLI flag via a wrapper script in the native messaging manifest. + +## Building + +```bash +cd clients/chrome-extension-native-host +bun install +bun run build # produces dist/index.js +``` + +`bun run build` is a thin wrapper around `tsc -p tsconfig.json`. The +output is a single ES module file under `dist/` that can be invoked +directly with `node dist/index.js`. This form is convenient for local +development and unit tests. + +## Bundling into the macOS app + +The production macOS `.app` does **not** ship the `dist/index.js` form +— Chrome's native messaging `path` field must point at a runnable +executable, and we do not want to assume that the user has `node` on +their `$PATH`. Instead, `clients/macos/build.sh` uses +`bun build --compile` (via its shared `build_bun_binary` helper) to +produce a self-contained single-file binary named +`vellum-chrome-native-host`, writes it to +`$SCRIPT_DIR/native-host-bin/`, and then copies it into the app bundle +at `Contents/MacOS/vellum-chrome-native-host` alongside the other +compiled Bun binaries (`vellum-daemon`, `vellum-cli`, +`vellum-gateway`, `vellum-assistant`). + +At first launch, the Swift-side `NativeMessagingInstaller` (see +`clients/macos/vellum-assistant/Features/Installer/NativeMessagingInstaller.swift`) +resolves the bundled binary via +`Bundle.main.url(forAuxiliaryExecutable: "vellum-chrome-native-host")` +and writes `com.vellum.daemon.json` pointing `path` at that absolute +location. Because the manifest is regenerated on every launch, moving +or upgrading the `.app` bundle automatically repoints Chrome at the +new helper location without a manual re-pair step. + +### Why Bun single-file compile (not `node dist/index.js`) + +The plan initially considered shipping `dist/index.js` plus a wrapper +shell script. That approach was dropped because: + +1. Chrome's native messaging host `path` must be executable — it does + not support shell interpretation of script shebangs beyond the OS's + own `execve`, which means we would still need a compiled wrapper. +2. Every other binary the macOS app ships (daemon, CLI, gateway) uses + `bun build --compile` into a native single-file binary. Reusing + that same pipeline keeps the build/signing steps uniform and avoids + depending on the user having `node` installed. +3. The compiled binary participates in the app's codesign chain and + notarization pipeline the same way as the other helpers, which + keeps macOS Gatekeeper happy. + +### Manifest template + +The canonical manifest shape is checked in at +`clients/chrome-extension-native-host/com.vellum.daemon.json.template`. +`NativeMessagingInstaller` rebuilds the same structure in-memory via +`JSONSerialization` and overwrites the on-disk file on every launch +(idempotent) so that upgrading the app bundle automatically updates the +`path` and `allowed_origins` entries. The `__HELPER_BINARY_PATH__` and +`__VELLUM_EXTENSION_ID__` placeholders in the template are for +humans reading the checked-in file — the actual install never +performs template substitution. + +## Testing + +```bash +cd clients/chrome-extension-native-host +bun test src/__tests__/protocol.test.ts +``` + +The current test suite covers the framing protocol (round-trips, +multi-frame buffers, partial frames, empty buffers). The CLI itself does +not yet have integration tests — those land alongside PR 13 when the +extension-side bootstrap flow is wired up. + +## Local manual smoke test + +Once the assistant is running and exposing `/v1/browser-extension-pair` +(PR 11), you can exercise the helper end-to-end without Chrome by piping +a framed request to it on stdin. Add the extension ID you want to test +to `ALLOWED_EXTENSION_IDS` in `src/index.ts` (or use the existing dev +placeholder), then: + +```bash +node --input-type=module -e " + import { encodeFrame } from './dist/protocol.js'; + process.stdout.write(encodeFrame({ type: 'request_token' })); +" | node dist/index.js "chrome-extension://aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/" +``` + +(The package is ESM — `"type": "module"` in `package.json` — so the +inline snippet uses `--input-type=module` and dynamic `import()` rather +than `require()`, which would fail with `ERR_REQUIRE_ESM`.) + +The helper will write a single `token_response` frame to stdout and +exit `0`, or an `error` frame and exit `1`. + +To target a non-default assistant port (e.g. a named local instance): + +```bash +node --input-type=module -e " + import { encodeFrame } from './dist/protocol.js'; + process.stdout.write(encodeFrame({ type: 'request_token' })); +" | node dist/index.js \ + "chrome-extension://aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/" \ + --assistant-port 7822 +``` + +## Related PRs + +- **PR 11** — Assistant `/v1/browser-extension-pair` endpoint that mints + the capability token this helper requests. +- **PR 12** — macOS installer changes that drop the compiled binary and + the native messaging host manifest into Chrome's well-known + `NativeMessagingHosts` directory. +- **PR 13** — Chrome extension changes that call + `chrome.runtime.connectNative("com.vellum.daemon")`, send a + `request_token` frame, and persist the response in + `chrome.storage.local`. diff --git a/clients/chrome-extension-native-host/bun.lock b/clients/chrome-extension-native-host/bun.lock new file mode 100644 index 00000000000..c45aaa9d381 --- /dev/null +++ b/clients/chrome-extension-native-host/bun.lock @@ -0,0 +1,20 @@ +{ + "lockfileVersion": 1, + "configVersion": 1, + "workspaces": { + "": { + "name": "@vellum/chrome-extension-native-host", + "devDependencies": { + "@types/node": "^20.0.0", + "typescript": "^5.5.0", + }, + }, + }, + "packages": { + "@types/node": ["@types/node@20.19.39", "", { "dependencies": { "undici-types": "~6.21.0" } }, "sha512-orrrD74MBUyK8jOAD/r0+lfa1I2MO6I+vAkmAWzMYbCcgrN4lCrmK52gRFQq/JRxfYPfonkr4b0jcY7Olqdqbw=="], + + "typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="], + + "undici-types": ["undici-types@6.21.0", "", {}, "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ=="], + } +} diff --git a/clients/chrome-extension-native-host/com.vellum.daemon.json.template b/clients/chrome-extension-native-host/com.vellum.daemon.json.template new file mode 100644 index 00000000000..3954da548c1 --- /dev/null +++ b/clients/chrome-extension-native-host/com.vellum.daemon.json.template @@ -0,0 +1,9 @@ +{ + "name": "com.vellum.daemon", + "description": "Vellum assistant native messaging host", + "path": "__HELPER_BINARY_PATH__", + "type": "stdio", + "allowed_origins": [ + "chrome-extension://__VELLUM_EXTENSION_ID__/" + ] +} diff --git a/clients/chrome-extension-native-host/package.json b/clients/chrome-extension-native-host/package.json new file mode 100644 index 00000000000..06de0b7cc85 --- /dev/null +++ b/clients/chrome-extension-native-host/package.json @@ -0,0 +1,19 @@ +{ + "name": "@vellum/chrome-extension-native-host", + "version": "0.1.0", + "private": true, + "license": "MIT", + "type": "module", + "bin": { + "vellum-chrome-native-host": "./dist/index.js" + }, + "scripts": { + "build": "tsc -p tsconfig.json", + "typecheck": "bunx tsc --noEmit", + "test": "bun test src/" + }, + "devDependencies": { + "@types/node": "^20.0.0", + "typescript": "^5.5.0" + } +} diff --git a/clients/chrome-extension-native-host/src/__tests__/index.test.ts b/clients/chrome-extension-native-host/src/__tests__/index.test.ts new file mode 100644 index 00000000000..4929078e55f --- /dev/null +++ b/clients/chrome-extension-native-host/src/__tests__/index.test.ts @@ -0,0 +1,320 @@ +/** + * Subprocess regression tests for the Chrome native messaging helper. + * + * These tests spawn the compiled helper binary and verify: + * + * 1. Unauthorized chrome-extension origins terminate with exit code 1 + * BEFORE any HTTP request is made to /v1/browser-extension-pair. + * (The helper installs its stdin listener only after the origin + * allowlist check, so unauthorized callers cannot inject frames.) + * + * 2. Authorized origins forward the pair endpoint's token, expiresAt, + * and guardianId fields verbatim into the native-messaging + * token_response frame. + * + * 3. A pair endpoint response missing guardianId causes the helper to + * exit non-zero and emit an error frame, preventing a malformed + * token from reaching the extension's bootstrap path. + * + * The suite skips gracefully when `dist/index.js` is missing so cold + * checkouts don't break CI. + */ + +import { existsSync } from "node:fs"; +import { dirname, resolve } from "node:path"; +import { fileURLToPath } from "node:url"; + +import { afterAll, beforeAll, describe, expect, test } from "bun:test"; + +import { decodeFrames, encodeFrame } from "../protocol.js"; + +// --------------------------------------------------------------------------- +// Paths & skip guard +// --------------------------------------------------------------------------- + +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); + +const HELPER_BINARY = resolve(__dirname, "../../dist/index.js"); +const HELPER_EXISTS = existsSync(HELPER_BINARY); + +const SKIP_REASON = + "clients/chrome-extension-native-host/dist/index.js is missing — run `bun run build` in clients/chrome-extension-native-host to enable these tests."; + +// The native helper hard-codes a placeholder allowlist with this single +// dev id. The companion route in +// `assistant/src/runtime/routes/browser-extension-pair-routes.ts` mirrors +// it; if either side ever diverges, both these tests and the e2e +// self-hosted test in `assistant/src/__tests__/` will fail loudly. +const ALLOWED_ORIGIN = "chrome-extension://aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/"; +const DISALLOWED_ORIGIN = + "chrome-extension://bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb/"; + +// --------------------------------------------------------------------------- +// Mock pair-endpoint server +// --------------------------------------------------------------------------- + +interface MockPairServer { + server: ReturnType; + port: number; + /** All requests received by the mock server, in order. */ + requests: Array<{ pathname: string; body: unknown }>; + /** Body the next pair request should return. */ + nextResponseBody: () => Record; + stop: () => void; +} + +/** + * Boot a tiny `Bun.serve` listener on a free port that records every + * request and responds with whatever `nextResponseBody` produces. The + * test can mutate `nextResponseBody` between scenarios to swap fixtures + * (e.g. drop `guardianId` from the response to exercise the + * malformed-frame rejection path). + */ +function startMockPairServer(): MockPairServer { + const state: MockPairServer = { + server: null as unknown as ReturnType, + port: 0, + requests: [], + nextResponseBody: () => ({ + token: "tok-1", + expiresAt: "2026-12-31T00:00:00Z", + guardianId: "g-1", + }), + stop: () => { + /* replaced below */ + }, + }; + + const server = Bun.serve({ + port: 0, + hostname: "127.0.0.1", + async fetch(req) { + const url = new URL(req.url); + let body: unknown = null; + try { + body = await req.json(); + } catch { + body = null; + } + state.requests.push({ pathname: url.pathname, body }); + + if ( + url.pathname !== "/v1/browser-extension-pair" || + req.method !== "POST" + ) { + return new Response("not found", { status: 404 }); + } + + return Response.json(state.nextResponseBody()); + }, + }); + + state.server = server; + state.port = server.port as number; + state.stop = () => server.stop(true); + return state; +} + +// --------------------------------------------------------------------------- +// Subprocess helpers +// --------------------------------------------------------------------------- + +interface HelperRunResult { + frames: unknown[]; + stderr: string; + exitCode: number; +} + +/** + * Spawn the helper as a subprocess via `Bun.spawn`, write the framed + * `request_token` payload to its stdin, and collect stdout / stderr / + * exit code with a hard upper bound on wall-clock time. Uses + * `Bun.spawn` (instead of `node:child_process`) for `proc.exited` as a + * clean Promise that integrates with the bun:test runner. + */ +async function runHelper(options: { + extensionOrigin: string; + assistantPort: number; + stdinBytes: Buffer; + timeoutMs?: number; +}): Promise { + const args = [ + "node", + HELPER_BINARY, + options.extensionOrigin, + "--assistant-port", + String(options.assistantPort), + ]; + + const proc = Bun.spawn(args, { + stdin: "pipe", + stdout: "pipe", + stderr: "pipe", + env: { ...process.env }, + }); + + proc.stdin.write(options.stdinBytes); + await proc.stdin.end(); + + let timedOut = false; + const timeoutMs = options.timeoutMs ?? 1000; + const timer = setTimeout(() => { + timedOut = true; + try { + proc.kill("SIGKILL"); + } catch { + /* already exited */ + } + }, timeoutMs); + + const exitCode = await proc.exited; + clearTimeout(timer); + + const stdoutBuffer = Buffer.from(await new Response(proc.stdout).arrayBuffer()); + const stderrText = await new Response(proc.stderr).text(); + + if (timedOut) { + throw new Error( + `helper binary did not exit within ${timeoutMs}ms — stderr: ${stderrText}`, + ); + } + + let frames: unknown[]; + try { + frames = decodeFrames(stdoutBuffer).frames; + } catch (err) { + const detail = err instanceof Error ? err.message : String(err); + throw new Error( + `failed to decode helper stdout frames: ${detail}; raw stderr: ${stderrText}`, + ); + } + + return { frames, stderr: stderrText, exitCode }; +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe("native host — subprocess regression coverage", () => { + let pair: MockPairServer | null = null; + + beforeAll(() => { + if (!HELPER_EXISTS) return; + pair = startMockPairServer(); + // Sanity-check the bound port so a wrong-port misconfig doesn't + // silently masquerade as a network failure inside the helper. + if (!pair.port) { + throw new Error("mock pair server failed to bind"); + } + }); + + afterAll(() => { + if (pair) pair.stop(); + }); + + if (!HELPER_EXISTS) { + test.skip(`native helper binary not built — ${SKIP_REASON}`, () => { + /* intentionally empty */ + }); + return; + } + + test("unauthorized origin halts before contacting the pair endpoint", async () => { + const srv = pair!; + srv.requests.length = 0; + + const result = await runHelper({ + extensionOrigin: DISALLOWED_ORIGIN, + assistantPort: srv.port, + // Pre-write a request_token frame so that if the unauthorized + // branch ever falls through to the stdin listener, the helper + // would observe a frame and POST the pair endpoint. Both of + // those side effects are asserted below. + stdinBytes: encodeFrame({ type: "request_token" }), + timeoutMs: 1000, + }); + + // The helper must terminate with a non-zero exit code (we use the + // documented value of 1) within the 1s timeout. If `runHelper` + // throws on timeout, this assertion will not run. + expect(result.exitCode).toBe(1); + + // Exactly one error frame on stdout — never a token_response. + expect(result.frames).toHaveLength(1); + const frame = result.frames[0] as { type?: unknown; message?: unknown }; + expect(frame.type).toBe("error"); + expect(frame.message).toBe("unauthorized_origin"); + + // The critical invariant: the helper must NOT have POSTed anything + // to /v1/browser-extension-pair. If the unauthorized branch falls + // through and the stdin listener runs, the mock server's + // `requests` array will contain at least one entry. + expect(srv.requests).toHaveLength(0); + }); + + test("authorized origin forwards guardianId in the token_response frame", async () => { + const srv = pair!; + srv.requests.length = 0; + srv.nextResponseBody = () => ({ + token: "tok-1", + expiresAt: "2026-12-31T00:00:00Z", + guardianId: "g-1", + }); + + const result = await runHelper({ + extensionOrigin: ALLOWED_ORIGIN, + assistantPort: srv.port, + stdinBytes: encodeFrame({ type: "request_token" }), + timeoutMs: 2000, + }); + + expect(result.exitCode, `helper stderr: ${result.stderr}`).toBe(0); + expect(result.frames).toHaveLength(1); + + const frame = result.frames[0] as { + type?: unknown; + token?: unknown; + expiresAt?: unknown; + guardianId?: unknown; + }; + expect(frame.type).toBe("token_response"); + expect(frame.token).toBe("tok-1"); + expect(frame.expiresAt).toBe("2026-12-31T00:00:00Z"); + expect(frame.guardianId).toBe("g-1"); + + // The helper should have made exactly one POST to the pair + // endpoint, carrying the extension origin we passed on argv. + expect(srv.requests).toHaveLength(1); + expect(srv.requests[0]!.pathname).toBe("/v1/browser-extension-pair"); + expect(srv.requests[0]!.body).toEqual({ extensionOrigin: ALLOWED_ORIGIN }); + }); + + test("missing guardianId in the pair response is rejected with an error frame", async () => { + const srv = pair!; + srv.requests.length = 0; + // Mock returns a body without `guardianId`. The helper's + // request-token validation should catch the missing field and + // surface an error frame instead of writing a malformed + // token_response. + srv.nextResponseBody = () => ({ + token: "tok-1", + expiresAt: "2026-12-31T00:00:00Z", + }); + + const result = await runHelper({ + extensionOrigin: ALLOWED_ORIGIN, + assistantPort: srv.port, + stdinBytes: encodeFrame({ type: "request_token" }), + timeoutMs: 2000, + }); + + expect(result.exitCode).not.toBe(0); + expect(result.frames).toHaveLength(1); + const frame = result.frames[0] as { type?: unknown; message?: unknown }; + expect(frame.type).toBe("error"); + expect(typeof frame.message).toBe("string"); + expect(frame.message).toMatch(/guardianId/); + }); +}); diff --git a/clients/chrome-extension-native-host/src/__tests__/integration.test.ts b/clients/chrome-extension-native-host/src/__tests__/integration.test.ts new file mode 100644 index 00000000000..60d0608f282 --- /dev/null +++ b/clients/chrome-extension-native-host/src/__tests__/integration.test.ts @@ -0,0 +1,328 @@ +/** + * Subprocess integration tests for the Chrome native messaging helper. + * + * Spawns the compiled `dist/index.js` binary as a child process, wires a + * tiny local HTTP server onto `127.0.0.1:` that impersonates the + * assistant's `/v1/browser-extension-pair` endpoint, pipes a framed + * `request_token` message to the helper's stdin, and asserts that the + * helper responds with a `token_response` frame on stdout. + * + * These tests exercise the end-to-end stdio framing contract that Chrome + * relies on when spawning the helper via `chrome.runtime.connectNative`. + * They also cover the `unauthorized_origin` rejection path. + * + * The tests skip gracefully if `dist/index.js` is missing — cold checkouts + * and CI jobs that haven't run `bun run build` yet shouldn't fail here. + * Run `bun run build` in this package before running these tests. + */ + +import { spawn, type ChildProcessWithoutNullStreams } from "node:child_process"; +import { existsSync } from "node:fs"; +import { createServer, type Server } from "node:http"; +import { AddressInfo } from "node:net"; +import { dirname, resolve } from "node:path"; +import { fileURLToPath } from "node:url"; + +import { afterAll, beforeAll, describe, expect, test } from "bun:test"; + +import { decodeFrames, encodeFrame } from "../protocol.js"; + +// --------------------------------------------------------------------------- +// Paths & skip guard +// --------------------------------------------------------------------------- + +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); + +/** + * Absolute path to the built helper entry point. The test suite skips if + * this file doesn't exist so the suite stays green on cold builds where + * `bun run build` hasn't been invoked in the native-host package yet. + */ +const HELPER_BINARY = resolve(__dirname, "../../dist/index.js"); + +const HELPER_EXISTS = existsSync(HELPER_BINARY); + +const ALLOWED_ORIGIN = "chrome-extension://aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/"; +const DISALLOWED_ORIGIN = + "chrome-extension://bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb/"; + +/** + * Default skip message so every `test.skip` surface has the same actionable + * explanation when the helper binary hasn't been built. + */ +const SKIP_REASON = + "clients/chrome-extension-native-host/dist/index.js is missing — run `bun run build` in clients/chrome-extension-native-host to enable these tests."; + +// --------------------------------------------------------------------------- +// Mock pair-endpoint HTTP server +// --------------------------------------------------------------------------- + +interface MockPairServer { + server: Server; + port: number; + /** Requests received by the mock server, in order. */ + requests: Array<{ path: string; body: unknown; host: string | null }>; + /** Token value returned by the next successful pair request. */ + nextToken: { token: string; expiresAt: string }; + /** If set, the mock returns this HTTP status instead of 200 on pair. */ + failWithStatus: number | null; +} + +async function startMockPairServer(): Promise { + const state: MockPairServer = { + server: null as unknown as Server, + port: 0, + requests: [], + nextToken: { + token: "fake-token-from-mock-pair-server", + expiresAt: new Date(Date.now() + 60_000).toISOString(), + }, + failWithStatus: null, + }; + + const server = createServer((req, res) => { + const chunks: Buffer[] = []; + req.on("data", (chunk: Buffer) => chunks.push(chunk)); + req.on("end", () => { + const raw = Buffer.concat(chunks).toString("utf8"); + let body: unknown = null; + try { + body = raw ? JSON.parse(raw) : null; + } catch { + body = raw; + } + state.requests.push({ + path: req.url ?? "", + body, + host: req.headers.host ?? null, + }); + + if (req.url !== "/v1/browser-extension-pair" || req.method !== "POST") { + res.statusCode = 404; + res.end("not found"); + return; + } + + if (state.failWithStatus !== null) { + res.statusCode = state.failWithStatus; + res.setHeader("content-type", "application/json"); + res.end(JSON.stringify({ error: "mock failure" })); + return; + } + + res.statusCode = 200; + res.setHeader("content-type", "application/json"); + res.end( + JSON.stringify({ + token: state.nextToken.token, + expiresAt: state.nextToken.expiresAt, + guardianId: "mock-guardian", + }), + ); + }); + }); + + await new Promise((resolvePromise, rejectPromise) => { + server.once("error", rejectPromise); + server.listen(0, "127.0.0.1", () => { + server.removeListener("error", rejectPromise); + resolvePromise(); + }); + }); + + const addr = server.address() as AddressInfo; + state.server = server; + state.port = addr.port; + return state; +} + +async function stopMockPairServer(mock: MockPairServer): Promise { + await new Promise((resolvePromise) => { + mock.server.close(() => resolvePromise()); + }); +} + +// --------------------------------------------------------------------------- +// Subprocess helpers +// --------------------------------------------------------------------------- + +interface HelperRunResult { + frames: unknown[]; + stderr: string; + exitCode: number | null; +} + +/** + * Spawn the helper binary with the given extension origin + assistant + * port, write the provided raw stdin bytes, then collect the decoded + * response frames, stderr output, and exit code. + * + * The helper is a short-lived one-shot process (Chrome re-spawns it on + * every `connectNative` call), so we drive it by writing stdin and then + * closing it, then waiting for the process to exit. + */ +function runHelper(options: { + extensionOrigin: string | null; + assistantPort: number | null; + stdinBytes: Buffer | null; + timeoutMs?: number; +}): Promise { + const args: string[] = [HELPER_BINARY]; + if (options.extensionOrigin) args.push(options.extensionOrigin); + if (options.assistantPort !== null) { + args.push("--assistant-port", String(options.assistantPort)); + } + + const child: ChildProcessWithoutNullStreams = spawn("node", args, { + stdio: ["pipe", "pipe", "pipe"], + env: { ...process.env }, + }); + + const stdoutChunks: Buffer[] = []; + const stderrChunks: Buffer[] = []; + child.stdout.on("data", (chunk: Buffer) => stdoutChunks.push(chunk)); + child.stderr.on("data", (chunk: Buffer) => stderrChunks.push(chunk)); + + if (options.stdinBytes) { + child.stdin.write(options.stdinBytes); + } + child.stdin.end(); + + return new Promise((resolvePromise, rejectPromise) => { + const timeout = setTimeout(() => { + child.kill("SIGKILL"); + rejectPromise( + new Error( + `helper binary timed out after ${options.timeoutMs ?? 5000}ms`, + ), + ); + }, options.timeoutMs ?? 5000); + + child.on("error", (err) => { + clearTimeout(timeout); + rejectPromise(err); + }); + + child.on("close", (code) => { + clearTimeout(timeout); + const stdout = Buffer.concat(stdoutChunks); + const stderr = Buffer.concat(stderrChunks).toString("utf8"); + try { + const { frames } = decodeFrames(stdout); + resolvePromise({ frames, stderr, exitCode: code }); + } catch (err) { + rejectPromise(err as Error); + } + }); + }); +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe("native host helper — subprocess integration", () => { + let mock: MockPairServer | null = null; + + beforeAll(async () => { + if (!HELPER_EXISTS) return; + mock = await startMockPairServer(); + }); + + afterAll(async () => { + if (mock) await stopMockPairServer(mock); + }); + + if (!HELPER_EXISTS) { + test.skip(`native helper binary not built — ${SKIP_REASON}`, () => { + /* intentionally empty — see skip reason */ + }); + return; + } + + test("responds to request_token with a token_response frame", async () => { + // Narrow the type for TypeScript — we know mock is non-null here + // because the beforeAll returned early only when HELPER_EXISTS was + // false, which would have routed us into the skip branch above. + const pair = mock!; + pair.nextToken = { + token: "integration-test-token-value", + expiresAt: new Date(Date.now() + 5 * 60_000).toISOString(), + }; + + const result = await runHelper({ + extensionOrigin: ALLOWED_ORIGIN, + assistantPort: pair.port, + stdinBytes: encodeFrame({ type: "request_token" }), + }); + + expect(result.exitCode).toBe(0); + expect(result.frames).toHaveLength(1); + + const frame = result.frames[0] as { + type: string; + token?: string; + expiresAt?: string; + }; + expect(frame.type).toBe("token_response"); + expect(frame.token).toBe("integration-test-token-value"); + expect(typeof frame.expiresAt).toBe("string"); + expect(frame.expiresAt!.length).toBeGreaterThan(0); + + // The mock server should have observed exactly one pair request + // carrying the extension origin we passed on the command line. + expect(pair.requests.length).toBe(1); + expect(pair.requests[0]!.path).toBe("/v1/browser-extension-pair"); + expect(pair.requests[0]!.body).toEqual({ extensionOrigin: ALLOWED_ORIGIN }); + }); + + test("rejects disallowed extension origin with an error frame", async () => { + const pair = mock!; + // Reset the request log so we can assert the helper never contacted + // the pair endpoint in the unauthorized case. + pair.requests.length = 0; + + const result = await runHelper({ + extensionOrigin: DISALLOWED_ORIGIN, + assistantPort: pair.port, + stdinBytes: encodeFrame({ type: "request_token" }), + }); + + expect(result.exitCode).not.toBe(0); + expect(result.frames).toHaveLength(1); + const frame = result.frames[0] as { type: string; message?: string }; + expect(frame.type).toBe("error"); + expect(frame.message).toBe("unauthorized_origin"); + + // No pair request should have been sent — the helper rejects + // unknown extension origins before touching the network. + expect(pair.requests.length).toBe(0); + }); + + test("surfaces an error frame when the pair endpoint fails", async () => { + const pair = mock!; + pair.requests.length = 0; + pair.failWithStatus = 500; + + try { + const result = await runHelper({ + extensionOrigin: ALLOWED_ORIGIN, + assistantPort: pair.port, + stdinBytes: encodeFrame({ type: "request_token" }), + }); + + expect(result.exitCode).not.toBe(0); + expect(result.frames).toHaveLength(1); + const frame = result.frames[0] as { type: string; message?: string }; + expect(frame.type).toBe("error"); + // The helper wraps HTTP errors in a descriptive message; just + // assert it mentions the failure rather than pinning the exact + // phrasing, which is an implementation detail. + expect(typeof frame.message).toBe("string"); + expect(frame.message).toMatch(/pair/i); + } finally { + pair.failWithStatus = null; + } + }); +}); diff --git a/clients/chrome-extension-native-host/src/__tests__/protocol.test.ts b/clients/chrome-extension-native-host/src/__tests__/protocol.test.ts new file mode 100644 index 00000000000..0c1fb08ecbe --- /dev/null +++ b/clients/chrome-extension-native-host/src/__tests__/protocol.test.ts @@ -0,0 +1,126 @@ +/** + * Tests for the Chrome native messaging stdio framing in protocol.ts. + * + * Run with `bun test src/__tests__/protocol.test.ts` from the package root. + */ + +import { describe, expect, test } from "bun:test"; + +import { decodeFrames, encodeFrame, FrameDecodeError } from "../protocol.js"; + +describe("encodeFrame / decodeFrames", () => { + test("round-trips a simple object through encode/decode", () => { + const payload = { type: "request_token", origin: "chrome-extension://abc/" }; + const frame = encodeFrame(payload); + + // Frame layout: 4-byte LE length prefix followed by JSON body. + expect(frame.length).toBeGreaterThan(4); + const expectedLen = Buffer.from(JSON.stringify(payload), "utf8").length; + expect(frame.readUInt32LE(0)).toBe(expectedLen); + + const { frames, remainder } = decodeFrames(frame); + expect(frames).toHaveLength(1); + expect(frames[0]).toEqual(payload); + expect(remainder.length).toBe(0); + }); + + test("decodes multiple frames in one buffer", () => { + const a = { type: "first", n: 1 }; + const b = { type: "second", n: 2 }; + const c = { type: "third", nested: { ok: true } }; + const combined = Buffer.concat([encodeFrame(a), encodeFrame(b), encodeFrame(c)]); + + const { frames, remainder } = decodeFrames(combined); + expect(frames).toHaveLength(3); + expect(frames[0]).toEqual(a); + expect(frames[1]).toEqual(b); + expect(frames[2]).toEqual(c); + expect(remainder.length).toBe(0); + }); + + test("leaves a partial frame in the remainder", () => { + const payload = { type: "request_token" }; + const frame = encodeFrame(payload); + // Slice off the last 3 bytes of the JSON body so the frame is incomplete. + const truncated = frame.subarray(0, frame.length - 3); + + const { frames, remainder } = decodeFrames(truncated); + expect(frames).toHaveLength(0); + // Remainder should equal the truncated input verbatim so the caller can + // append the next chunk and try again. + expect(remainder.equals(truncated)).toBe(true); + }); + + test("leaves a partial length prefix in the remainder", () => { + // Less than 4 bytes — not even enough for the length field. + const partial = Buffer.from([0x01, 0x02]); + const { frames, remainder } = decodeFrames(partial); + expect(frames).toHaveLength(0); + expect(remainder.equals(partial)).toBe(true); + }); + + test("decodes a complete frame followed by a partial frame", () => { + const complete = { type: "complete" }; + const partialPayload = { type: "partial" }; + const combined = Buffer.concat([ + encodeFrame(complete), + encodeFrame(partialPayload).subarray(0, 5), + ]); + + const { frames, remainder } = decodeFrames(combined); + expect(frames).toHaveLength(1); + expect(frames[0]).toEqual(complete); + expect(remainder.length).toBe(5); + }); + + test("handles an empty buffer", () => { + const { frames, remainder } = decodeFrames(Buffer.alloc(0)); + expect(frames).toHaveLength(0); + expect(remainder.length).toBe(0); + }); + + test("throws FrameDecodeError when a complete frame body is invalid JSON", () => { + // Hand-craft a frame: 4-byte LE length prefix + a body that is not + // valid JSON. The decoder should reach the JSON.parse step (because + // the buffer has a full frame's worth of bytes) and throw, rather + // than crashing the host with an uncaught SyntaxError. + const body = Buffer.from("not-json{", "utf8"); + const len = Buffer.alloc(4); + len.writeUInt32LE(body.length, 0); + const malformed = Buffer.concat([len, body]); + + expect(() => decodeFrames(malformed)).toThrow(FrameDecodeError); + expect(() => decodeFrames(malformed)).toThrow(/malformed_frame_json/); + }); + + test("FrameDecodeError preserves the underlying SyntaxError as cause", () => { + const body = Buffer.from("{not-valid", "utf8"); + const len = Buffer.alloc(4); + len.writeUInt32LE(body.length, 0); + const malformed = Buffer.concat([len, body]); + + let caught: unknown; + try { + decodeFrames(malformed); + } catch (err) { + caught = err; + } + expect(caught).toBeInstanceOf(FrameDecodeError); + expect((caught as FrameDecodeError).cause).toBeInstanceOf(SyntaxError); + }); + + test("a malformed frame after a valid one still throws (does not silently drop the valid one)", () => { + // Buffer layout: [valid frame][malformed frame]. The decoder iterates + // in order and throws on the second frame. The current contract is + // "fail loud on the first malformed frame" — we don't try to return + // any frames decoded before the failure, since the caller should + // surface a protocol_error and exit anyway. + const valid = encodeFrame({ type: "request_token" }); + const badBody = Buffer.from("definitely not json", "utf8"); + const badLen = Buffer.alloc(4); + badLen.writeUInt32LE(badBody.length, 0); + const combined = Buffer.concat([valid, badLen, badBody]); + + expect(() => decodeFrames(combined)).toThrow(FrameDecodeError); + }); +}); diff --git a/clients/chrome-extension-native-host/src/index.ts b/clients/chrome-extension-native-host/src/index.ts new file mode 100644 index 00000000000..390c227cdef --- /dev/null +++ b/clients/chrome-extension-native-host/src/index.ts @@ -0,0 +1,388 @@ +#!/usr/bin/env node +/** + * Vellum chrome-extension native messaging helper. + * + * This binary is spawned by Chrome when the Vellum browser extension calls + * `chrome.runtime.connectNative("com.vellum.daemon")`. It speaks the Chrome + * native messaging stdio protocol (4-byte little-endian length prefix + + * UTF-8 JSON) on stdin/stdout. + * + * Responsibilities: + * + * 1. Verify that the calling extension's origin (passed by Chrome as the + * first command-line argument, e.g. `chrome-extension:///`) is on a + * hard-coded allowlist of known Vellum extension IDs. + * 2. Listen on stdin for `{ type: "request_token" }` frames. + * 3. POST the calling extension's origin to the running assistant's + * `/v1/browser-extension-pair` endpoint (port resolved from + * `--assistant-port`, then `~/.vellum/runtime-port`, then defaulting to + * 7821). + * 4. Echo the assistant's response back to Chrome as a + * `{ type: "token_response", token, expiresAt, guardianId }` frame. + * 5. On any unrecoverable error, write a `{ type: "error", message }` frame + * and exit with a non-zero status. + * + * The helper deliberately does NOT persist tokens — the extension is + * responsible for storing the returned token in `chrome.storage.local`. + * + * The pairing flow as a whole consists of: (a) this helper, (b) the + * assistant-side `/v1/browser-extension-pair` endpoint that mints the + * capability token, and (c) the macOS installer wiring that drops the + * compiled binary alongside the native-messaging host manifest Chrome + * reads to resolve `com.vellum.daemon`. + */ + +import { readFileSync } from "node:fs"; +import { homedir } from "node:os"; +import { join } from "node:path"; + +import { decodeFrames, encodeFrame, FrameDecodeError } from "./protocol.js"; + +/** + * Allowlist of Chrome extension IDs that are permitted to spawn this helper. + * + * Chrome passes the calling extension's origin as the first positional + * argument, e.g. `chrome-extension://aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/`. + * Anything not on this list is rejected before any further processing. + */ +const ALLOWED_EXTENSION_IDS: ReadonlySet = new Set([ + // Dev placeholder — replaced when the unpacked extension is loaded locally. + // TODO: production id before release + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", +]); + +const DEFAULT_ASSISTANT_PORT = 7821; +const RUNTIME_PORT_FILE = join(homedir(), ".vellum", "runtime-port"); + +interface TokenResponse { + token: string; + expiresAt: string; + guardianId: string; +} + +/** + * Parse a `--assistant-port ` (or `--assistant-port=`) + * argument out of `process.argv`. Returns the parsed port if present and + * valid, otherwise `null`. + * + * This is intentionally a tiny hand-rolled parser rather than a full CLI + * library: the helper is invoked by Chrome's native messaging runtime which + * has a fixed argv shape, and pulling in a CLI dependency would bloat the + * audited surface for no real gain. + */ +function parseAssistantPortArg(argv: readonly string[]): number | null { + for (let i = 0; i < argv.length; i++) { + const arg = argv[i]!; + let raw: string | undefined; + if (arg === "--assistant-port") { + raw = argv[i + 1]; + } else if (arg.startsWith("--assistant-port=")) { + raw = arg.slice("--assistant-port=".length); + } else { + continue; + } + if (raw === undefined) return null; + const parsed = Number.parseInt(raw, 10); + if (Number.isFinite(parsed) && parsed > 0 && parsed < 65536) { + return parsed; + } + return null; + } + return null; +} + +/** + * Resolve the assistant's HTTP port. Resolution order: + * + * 1. `--assistant-port ` CLI flag (highest precedence). This exists + * so a wrapper script registered in Chrome's NativeMessagingHosts + * manifest can pin the helper to a known port without relying on a + * lockfile. + * 2. `~/.vellum/runtime-port` lockfile (a single integer). This file is + * not yet written by the assistant — see the TODO in this package's + * README. Once it is, no manifest changes are needed for default + * installs. + * 3. The well-known default port `7821`. + * + * Any read or parse failure on the lockfile silently falls through to the + * default rather than crashing — the assistant is the ultimate source of + * truth and the subsequent HTTP call will surface a clear connection error. + */ +function resolveAssistantPort(argv: readonly string[]): number { + const fromArg = parseAssistantPortArg(argv); + if (fromArg !== null) return fromArg; + try { + const raw = readFileSync(RUNTIME_PORT_FILE, "utf8").trim(); + const parsed = Number.parseInt(raw, 10); + if (Number.isFinite(parsed) && parsed > 0 && parsed < 65536) { + return parsed; + } + } catch { + // Fall through to the default. This is expected on first launch and in + // dev environments where the port file hasn't been written yet. + } + return DEFAULT_ASSISTANT_PORT; +} + +/** + * Extract the bare extension id from a `chrome-extension:///` origin. + * Returns `null` if the input doesn't match the expected shape. + */ +function parseExtensionId(origin: string | undefined): string | null { + if (!origin) return null; + const match = origin.match(/^chrome-extension:\/\/([a-p]{32})\/?$/); + return match ? match[1]! : null; +} + +/** + * Writes a native-messaging frame to stdout and terminates the process + * synchronously. The exit code is the authoritative signal to Chrome; + * the frame body is best-effort. Use this for error paths (unauthorized + * origin, malformed requests) where Chrome only needs to observe a + * non-zero exit and any frame-body truncation is acceptable. + * + * Typed `never` because `process.exit()` never returns, which lets + * callers treat this as an unconditional terminator with no event-loop + * tick between the write and the exit. + */ +function writeErrorFrameAndExit(payload: unknown, exitCode: number): never { + try { + process.stdout.write(encodeFrame(payload)); + } catch { + // Ignore — exit code is the authoritative signal here. + } + process.exit(exitCode); +} + +/** + * Writes a native-messaging frame to stdout and terminates the process + * only after libuv has flushed the write to the pipe. Use this for + * success paths (e.g., `token_response`) where Chrome needs the full + * frame body to drive the extension's pairing flow. + * + * The callback form of `process.stdout.write()` fires once the buffer + * has been handed off to the kernel, so awaiting the returned Promise + * guarantees the frame made it across the pipe before the process + * exits. This matters on pipe-backed stdout (Chrome native messaging) + * where a sync `process.exit()` can terminate before libuv finishes + * flushing a large-enough frame — most visibly on Windows. + * + * The Promise never resolves: the callback always ends in + * `process.exit(exitCode)`, so from the caller's perspective an `await` + * on this function is a terminator. A defensive 5-second safety timeout + * rejects if the callback somehow never fires; the timer is `.unref()`ed + * so it cannot keep the event loop alive on its own. + */ +function writeFrameAndExitAsync( + payload: unknown, + exitCode: number, +): Promise { + return new Promise((_, reject) => { + process.stdout.write(encodeFrame(payload), () => { + // Best-effort: exit with the intended code even if the callback + // reports a write error. Chrome will observe a disconnect on the + // pipe and report the error through its native-messaging UI. + process.exit(exitCode); + }); + const safety = setTimeout(() => { + reject( + new Error( + "writeFrameAndExitAsync timed out waiting for stdout flush", + ), + ); + }, 5000); + safety.unref?.(); + }); +} + +/** + * Emit an `error` frame and exit with a non-zero status. Also logs the + * underlying message to stderr so an operator running the binary by hand + * (or a Chrome extension developer inspecting the host's stderr stream) + * can see what went wrong. + * + * Uses `writeErrorFrameAndExit` so the error frame is written to stdout + * before the process terminates. `writeErrorFrameAndExit` handles its + * own write failures internally, so no additional try/catch is needed + * here. + */ +function fail(message: string, code = 1): never { + process.stderr.write(`vellum-chrome-native-host: ${message}\n`); + writeErrorFrameAndExit({ type: "error", message }, code); +} + +/** + * POST the extension origin to the assistant's pair endpoint and return the + * issued capability token. Surfaces a uniform error message on failure so + * the caller can wrap it in a native-messaging error frame. + * + * Note: error messages here are user-visible (they get propagated to Chrome + * as `{ type: "error", message }` frames and surfaced in the extension UI), + * so per AGENTS.md they refer to the local process as the "assistant" rather + * than the internal "daemon" name. + */ +async function requestToken( + extensionOrigin: string, + argv: readonly string[], +): Promise { + const port = resolveAssistantPort(argv); + const url = `http://127.0.0.1:${port}/v1/browser-extension-pair`; + + let response: Response; + try { + response = await fetch(url, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ extensionOrigin }), + }); + } catch (err) { + const detail = err instanceof Error ? err.message : String(err); + throw new Error(`failed to reach assistant at ${url}: ${detail}`); + } + + if (!response.ok) { + throw new Error( + `assistant pair request failed with HTTP ${response.status}`, + ); + } + + let body: unknown; + try { + body = await response.json(); + } catch (err) { + const detail = err instanceof Error ? err.message : String(err); + throw new Error(`assistant pair response was not valid JSON: ${detail}`); + } + + if ( + !body || + typeof body !== "object" || + typeof (body as { token?: unknown }).token !== "string" || + typeof (body as { expiresAt?: unknown }).expiresAt !== "string" + ) { + throw new Error("assistant pair response missing token / expiresAt"); + } + + const { token, expiresAt, guardianId } = body as TokenResponse; + if (typeof guardianId !== "string" || guardianId.length === 0) { + throw new Error("pair endpoint response missing guardianId"); + } + return { token, expiresAt, guardianId }; +} + +async function main(): Promise { + // Chrome passes the calling extension's origin (e.g. + // `chrome-extension://aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/`) as the first + // positional argument when it spawns the native messaging host. + // + // Where that lands in `process.argv` depends on how the manifest's `path` + // is set up: if it points directly at a compiled binary, the origin shows + // up at `argv[1]`; if it points at a wrapper shell script that re-execs + // `node dist/index.js "$@"`, the origin lands at `argv[2]` (because Node + // takes argv[0] = node and argv[1] = the script path). To stay robust + // across both deployment shapes we scan all post-argv[0] arguments for + // the first one that looks like a `chrome-extension://` URL. + const extensionOrigin = process.argv + .slice(1) + .find((arg) => arg.startsWith("chrome-extension://")); + const extensionId = parseExtensionId(extensionOrigin); + + if (!extensionId || !ALLOWED_EXTENSION_IDS.has(extensionId)) { + process.stderr.write( + `vellum-chrome-native-host: unauthorized_origin (got ${extensionOrigin ?? ""})\n`, + ); + writeErrorFrameAndExit( + { type: "error", message: "unauthorized_origin" }, + 1, + ); + // Defense-in-depth: even though writeErrorFrameAndExit calls + // process.exit synchronously and is typed `never`, an explicit + // `return` here guarantees we never fall through to the stdin + // listener setup below if a future refactor accidentally makes the + // helper async. + return; + } + + // Reading stdin in 4-byte-framed chunks. Chrome may deliver a single + // request across multiple `data` events, so we accumulate into a buffer + // and let `decodeFrames` peel off whole messages as they arrive. + let pending: Buffer = Buffer.alloc(0); + let handling = false; + + process.stdin.on("data", async (chunk: Buffer) => { + // The entire handler body is wrapped in a try/catch so that any + // unexpected exception (most notably `FrameDecodeError` from a + // malformed JSON body, but also any synchronous error before the + // request reaches `requestToken`) is translated into a protocol-level + // `error` frame instead of bubbling up to Node's unhandled-rejection + // path and silently exit-1'ing the process. + try { + pending = Buffer.concat([pending, chunk]); + const { frames, remainder } = decodeFrames(pending); + pending = remainder; + + for (const frame of frames) { + if (handling) { + // We only support a single in-flight request per spawn — Chrome + // re-spawns the helper on every `connectNative` call. Anything + // beyond the first frame is treated as a protocol error. + fail("unexpected_additional_frame"); + } + handling = true; + + if ( + !frame || + typeof frame !== "object" || + (frame as { type?: unknown }).type !== "request_token" + ) { + fail("unsupported_frame_type"); + } + + try { + const { token, expiresAt, guardianId } = await requestToken( + extensionOrigin!, + process.argv, + ); + await writeFrameAndExitAsync( + { type: "token_response", token, expiresAt, guardianId }, + 0, + ); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + fail(message); + } + } + } catch (err) { + // `FrameDecodeError` is the most likely culprit here (malformed JSON + // body in a stdin frame), but we deliberately funnel any synchronous + // exception thrown out of `decodeFrames` or the dispatch loop above + // through this single catch so the helper always gets a chance to + // emit a structured `error` frame instead of dying with an + // unhandled exception. + const detail = + err instanceof FrameDecodeError + ? err.message + : err instanceof Error + ? err.message + : String(err); + fail(`protocol_error: ${detail}`); + } + }); + + process.stdin.on("end", () => { + if (!handling) { + // Chrome closed the pipe without sending a request — treat as a + // clean no-op exit so we don't pollute logs with bogus errors. + process.exit(0); + } + }); + + process.stdin.on("error", (err) => { + fail(`stdin_error: ${err.message}`); + }); +} + +main().catch((err) => { + const message = err instanceof Error ? err.message : String(err); + fail(message); +}); diff --git a/clients/chrome-extension-native-host/src/protocol.ts b/clients/chrome-extension-native-host/src/protocol.ts new file mode 100644 index 00000000000..c501ea67b02 --- /dev/null +++ b/clients/chrome-extension-native-host/src/protocol.ts @@ -0,0 +1,73 @@ +/** + * Chrome Native Messaging stdio framing. + * + * Each message exchanged between Chrome and the native host is prefixed with a + * 32-bit unsigned little-endian length, followed by a UTF-8 JSON payload of + * exactly that length. + * + * See: https://developer.chrome.com/docs/extensions/develop/concepts/native-messaging#native-messaging-host-protocol + */ + +/** + * Thrown by `decodeFrames` when a complete frame body is not valid JSON. + * + * The caller is expected to translate this into a protocol-level error frame + * (e.g. via the helper's `fail()` path) rather than letting it propagate as an + * uncaught exception, which would crash the host without surfacing a + * structured error response to Chrome. + */ +export class FrameDecodeError extends Error { + constructor(message: string, options?: { cause?: unknown }) { + super(message, options); + this.name = "FrameDecodeError"; + } +} + +/** + * Encode an arbitrary JSON-serializable payload as a single native-messaging + * frame: 4-byte little-endian length prefix followed by the UTF-8 JSON body. + */ +export function encodeFrame(payload: unknown): Buffer { + const json = Buffer.from(JSON.stringify(payload), "utf8"); + const len = Buffer.alloc(4); + len.writeUInt32LE(json.length, 0); + return Buffer.concat([len, json]); +} + +/** + * Decode as many complete frames as possible from a buffer accumulated from + * stdin. Returns the parsed frames plus any unconsumed bytes (a partial frame + * that should be carried into the next read). + * + * The decoder is intentionally tolerant of partial reads — Chrome may deliver + * a single message across multiple `data` events, and multiple messages may + * arrive coalesced in one event. + * + * If a complete frame body fails to parse as JSON, this function throws a + * `FrameDecodeError`. Callers should catch it and translate it into a + * protocol-level error frame rather than letting it crash the host. + */ +export function decodeFrames(buf: Buffer): { + frames: unknown[]; + remainder: Buffer; +} { + const frames: unknown[] = []; + let offset = 0; + while (buf.length - offset >= 4) { + const len = buf.readUInt32LE(offset); + if (buf.length - offset - 4 < len) break; + const body = buf.subarray(offset + 4, offset + 4 + len); + let parsed: unknown; + try { + parsed = JSON.parse(body.toString("utf8")); + } catch (err) { + const detail = err instanceof Error ? err.message : String(err); + throw new FrameDecodeError(`malformed_frame_json: ${detail}`, { + cause: err, + }); + } + frames.push(parsed); + offset += 4 + len; + } + return { frames, remainder: buf.subarray(offset) }; +} diff --git a/clients/chrome-extension-native-host/tsconfig.json b/clients/chrome-extension-native-host/tsconfig.json new file mode 100644 index 00000000000..737c9dd792a --- /dev/null +++ b/clients/chrome-extension-native-host/tsconfig.json @@ -0,0 +1,20 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "NodeNext", + "moduleResolution": "NodeNext", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true, + "resolveJsonModule": true, + "declaration": true, + "declarationMap": true, + "sourceMap": true, + "outDir": "./dist", + "rootDir": "./src", + "types": ["node"] + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "dist", "src/**/__tests__/**"] +} diff --git a/clients/chrome-extension/background/__tests__/cdp-proxy.test.ts b/clients/chrome-extension/background/__tests__/cdp-proxy.test.ts new file mode 100644 index 00000000000..568212c978f --- /dev/null +++ b/clients/chrome-extension/background/__tests__/cdp-proxy.test.ts @@ -0,0 +1,266 @@ +/** + * Tests for the standalone CDP JSON-RPC proxy. + * + * Drives `createCdpProxy` against an injected mock `ChromeDebuggerApi` so we + * can exercise both happy and error paths without touching any real + * `chrome.debugger` surface. The mock records every call and exposes a + * mutable `runtime.lastError` field — tests toggle that field between + * callback invocations to simulate the way Chrome surfaces async failures. + */ + +import { describe, test, expect } from 'bun:test'; + +import { + createCdpProxy, + type ChromeDebuggerApi, + type CdpDebuggee, + type CdpEventFrame, + type CdpRequestFrame, + type CdpTarget, + type DebuggerSession, +} from '../cdp-proxy.js'; + +// ── Mock fixture ──────────────────────────────────────────────────── + +interface MockChromeDebuggerApi extends ChromeDebuggerApi { + attachCalls: Array<{ target: DebuggerSession; requiredVersion: string }>; + detachCalls: Array<{ target: DebuggerSession }>; + sendCommandCalls: Array<{ + target: DebuggerSession; + method: string; + params?: Record; + }>; + /** + * Configurable result that the next `sendCommand` callback will be invoked + * with. Defaults to `undefined`. + */ + nextSendCommandResult?: unknown; + /** Listeners registered through `onEvent.addListener`. */ + eventListeners: Set< + (source: DebuggerSession, method: string, params?: unknown) => void + >; + /** Listeners removed through `onEvent.removeListener`. */ + removedEventListeners: Array< + (source: DebuggerSession, method: string, params?: unknown) => void + >; + /** Listeners registered through `onDetach.addListener`. */ + detachListeners: Set<(source: CdpDebuggee, reason: string) => void>; + /** Synthetically dispatch an event to all currently-registered listeners. */ + fireEvent(source: DebuggerSession, method: string, params?: unknown): void; +} + +function createMockApi(): MockChromeDebuggerApi { + const attachCalls: MockChromeDebuggerApi['attachCalls'] = []; + const detachCalls: MockChromeDebuggerApi['detachCalls'] = []; + const sendCommandCalls: MockChromeDebuggerApi['sendCommandCalls'] = []; + const eventListeners: MockChromeDebuggerApi['eventListeners'] = new Set(); + const removedEventListeners: MockChromeDebuggerApi['removedEventListeners'] = []; + const detachListeners: MockChromeDebuggerApi['detachListeners'] = new Set(); + + const api: MockChromeDebuggerApi = { + attachCalls, + detachCalls, + sendCommandCalls, + eventListeners, + removedEventListeners, + detachListeners, + nextSendCommandResult: undefined, + runtime: { lastError: undefined }, + attach(target, requiredVersion, callback) { + attachCalls.push({ target, requiredVersion }); + callback?.(); + }, + detach(target, callback) { + detachCalls.push({ target }); + callback?.(); + }, + sendCommand(target, method, params, callback) { + sendCommandCalls.push({ target, method, params }); + callback?.(api.nextSendCommandResult); + }, + onEvent: { + addListener(callback) { + eventListeners.add(callback); + }, + removeListener(callback) { + eventListeners.delete(callback); + removedEventListeners.push(callback); + }, + }, + onDetach: { + addListener(callback) { + detachListeners.add(callback); + }, + removeListener(callback) { + detachListeners.delete(callback); + }, + }, + fireEvent(source, method, params) { + for (const listener of eventListeners) { + listener(source, method, params); + } + }, + }; + return api; +} + +// ── Tests ─────────────────────────────────────────────────────────── + +describe('createCdpProxy', () => { + describe('attach', () => { + test('resolves on success', async () => { + const api = createMockApi(); + const proxy = createCdpProxy(api); + + await proxy.attach({ tabId: 1 }, '1.3'); + + expect(api.attachCalls.length).toBe(1); + expect(api.attachCalls[0].target).toEqual({ tabId: 1 }); + expect(api.attachCalls[0].requiredVersion).toBe('1.3'); + }); + + test('rejects on runtime.lastError', async () => { + const api = createMockApi(); + // Override the default attach so we can set lastError synchronously + // before invoking the callback — matches how Chrome's bindings flag + // failures from inside the callback frame. + api.attach = (_target, _requiredVersion, callback) => { + api.runtime.lastError = { message: 'no such tab' }; + callback?.(); + api.runtime.lastError = undefined; + }; + const proxy = createCdpProxy(api); + + let rejectionMessage: string | null = null; + try { + await proxy.attach({ tabId: 99 }, '1.3'); + } catch (err) { + rejectionMessage = err instanceof Error ? err.message : String(err); + } + expect(rejectionMessage).toBe('no such tab'); + }); + }); + + describe('send', () => { + test('resolves with result frame on success', async () => { + const api = createMockApi(); + api.nextSendCommandResult = { data: 'ok' }; + const proxy = createCdpProxy(api); + + const frame: CdpRequestFrame = { + id: 7, + method: 'Browser.getVersion', + params: { foo: 'bar' }, + }; + const result = await proxy.send({ tabId: 1 }, frame); + + expect(result).toEqual({ id: 7, result: { data: 'ok' } }); + expect(api.sendCommandCalls.length).toBe(1); + expect(api.sendCommandCalls[0].method).toBe('Browser.getVersion'); + expect(api.sendCommandCalls[0].params).toEqual({ foo: 'bar' }); + }); + + test('resolves with error frame on runtime.lastError', async () => { + const api = createMockApi(); + api.sendCommand = (_target, _method, _params, callback) => { + api.runtime.lastError = { message: 'cannot find context' }; + callback?.(undefined); + api.runtime.lastError = undefined; + }; + const proxy = createCdpProxy(api); + + const frame: CdpRequestFrame = { id: 11, method: 'Page.reload' }; + const result = await proxy.send({ tabId: 1 }, frame); + + expect(result).toEqual({ + id: 11, + error: { code: -32000, message: 'cannot find context' }, + }); + }); + + // Regression test for Codex P2: when targetToDebuggee throws synchronously + // inside the Promise executor (because the target has neither tabId nor + // targetId) the proxy must convert it into a -32602 error frame instead + // of letting the throw escape as a promise rejection. send()'s contract is + // to ALWAYS resolve with a CdpResultFrame. + test('resolves with error frame when targetToDebuggee throws synchronously', async () => { + const api = createMockApi(); + const proxy = createCdpProxy(api); + + const frame: CdpRequestFrame = { id: 13, method: 'Page.reload' }; + // Cast to CdpTarget so TypeScript accepts the deliberately empty shape. + const badTarget: CdpTarget = {}; + const result = await proxy.send(badTarget, frame); + + expect(result).toEqual({ + id: 13, + error: { + code: -32602, + message: 'CdpTarget must have either tabId or targetId', + }, + }); + // sendCommand must NOT have been invoked — we never made it past the + // pre-flight target resolution. + expect(api.sendCommandCalls.length).toBe(0); + }); + }); + + describe('onEvent', () => { + test('delivers events to all registered handlers', () => { + const api = createMockApi(); + const proxy = createCdpProxy(api); + + const received1: CdpEventFrame[] = []; + const received2: CdpEventFrame[] = []; + proxy.onEvent((event) => received1.push(event)); + proxy.onEvent((event) => received2.push(event)); + + api.fireEvent({ tabId: 1, sessionId: 'sess-A' }, 'Page.loadEventFired', { + timestamp: 123, + }); + + const expected: CdpEventFrame = { + method: 'Page.loadEventFired', + params: { timestamp: 123 }, + sessionId: 'sess-A', + }; + expect(received1).toEqual([expected]); + expect(received2).toEqual([expected]); + }); + + test('returns an unsubscribe that stops future deliveries', () => { + const api = createMockApi(); + const proxy = createCdpProxy(api); + + const received: CdpEventFrame[] = []; + const unsubscribe = proxy.onEvent((event) => received.push(event)); + + api.fireEvent({ tabId: 1 }, 'Page.loadEventFired', { phase: 'first' }); + expect(received.length).toBe(1); + + unsubscribe(); + + api.fireEvent({ tabId: 1 }, 'Page.loadEventFired', { phase: 'second' }); + // The unsubscribed handler must not have been called again. + expect(received.length).toBe(1); + }); + }); + + describe('dispose', () => { + test('removes the internal onEvent listener', () => { + const api = createMockApi(); + const proxy = createCdpProxy(api); + + // Capture the listener that the proxy registered at construction time. + expect(api.eventListeners.size).toBe(1); + const registered = Array.from(api.eventListeners)[0]; + + proxy.dispose(); + + // The proxy must call removeListener with the SAME callback reference + // that was registered — otherwise Chrome's listener bookkeeping leaks. + expect(api.removedEventListeners).toContain(registered); + expect(api.eventListeners.size).toBe(0); + }); + }); +}); diff --git a/clients/chrome-extension/background/__tests__/cloud-auth.test.ts b/clients/chrome-extension/background/__tests__/cloud-auth.test.ts new file mode 100644 index 00000000000..f4cd705e63e --- /dev/null +++ b/clients/chrome-extension/background/__tests__/cloud-auth.test.ts @@ -0,0 +1,208 @@ +/** + * Tests for the cloud OAuth state machine. + * + * These tests mock the `chrome.identity.launchWebAuthFlow` and + * `chrome.storage.local` surfaces so they can run under bun:test without + * a real Chrome runtime. + */ + +import { describe, test, expect, beforeEach, afterEach } from 'bun:test'; + +import { + getStoredToken, + clearStoredToken, + signInCloud, + type CloudAuthConfig, + type StoredCloudToken, +} from '../cloud-auth.js'; + +const STORAGE_KEY = 'vellum.cloudAuthToken'; + +interface FakeStorage { + data: Record; + get(key: string | string[]): Promise>; + set(items: Record): Promise; + remove(key: string | string[]): Promise; +} + +function createFakeStorage(): FakeStorage { + const data: Record = {}; + return { + data, + async get(key) { + const keys = Array.isArray(key) ? key : [key]; + const result: Record = {}; + for (const k of keys) { + if (k in data) result[k] = data[k]; + } + return result; + }, + async set(items) { + Object.assign(data, items); + }, + async remove(key) { + const keys = Array.isArray(key) ? key : [key]; + for (const k of keys) delete data[k]; + }, + }; +} + +const originalChrome = (globalThis as { chrome?: unknown }).chrome; + +let fakeStorage: FakeStorage; +let launchWebAuthFlowImpl: (details: { url: string; interactive: boolean }) => Promise; + +beforeEach(() => { + fakeStorage = createFakeStorage(); + launchWebAuthFlowImpl = async () => undefined; + (globalThis as { chrome?: unknown }).chrome = { + storage: { + local: fakeStorage, + }, + identity: { + getRedirectURL: (path: string) => `https://fakeextid.chromiumapp.org/${path}`, + launchWebAuthFlow: (details: { url: string; interactive: boolean }) => launchWebAuthFlowImpl(details), + }, + }; +}); + +afterEach(() => { + (globalThis as { chrome?: unknown }).chrome = originalChrome; +}); + +const config: CloudAuthConfig = { + gatewayBaseUrl: 'https://api.vellum.ai', + clientId: 'test-client-id', +}; + +describe('signInCloud', () => { + test('happy path stores a token and returns it', async () => { + launchWebAuthFlowImpl = async (details) => { + // The redirect URL the gateway would send back. + expect(details.url).toContain('https://api.vellum.ai/oauth/chrome-extension/start'); + expect(details.url).toContain('client_id=test-client-id'); + expect(details.interactive).toBe(true); + return 'https://fakeextid.chromiumapp.org/cloud-auth#token=abc123&expires_in=3600&guardian_id=g-42'; + }; + + const before = Date.now(); + const result = await signInCloud(config); + const after = Date.now(); + + expect(result.token).toBe('abc123'); + expect(result.guardianId).toBe('g-42'); + expect(result.expiresAt).toBeGreaterThanOrEqual(before + 3600 * 1000); + expect(result.expiresAt).toBeLessThanOrEqual(after + 3600 * 1000); + + // Verify it was persisted. + expect(fakeStorage.data[STORAGE_KEY]).toEqual(result); + }); + + test('missing token rejects with "incomplete payload"', async () => { + launchWebAuthFlowImpl = async () => + 'https://fakeextid.chromiumapp.org/cloud-auth#expires_in=3600&guardian_id=g-42'; + + await expect(signInCloud(config)).rejects.toThrow('incomplete payload'); + expect(fakeStorage.data[STORAGE_KEY]).toBeUndefined(); + }); + + test('missing expires_in rejects with "incomplete payload"', async () => { + launchWebAuthFlowImpl = async () => + 'https://fakeextid.chromiumapp.org/cloud-auth#token=abc123&guardian_id=g-42'; + + await expect(signInCloud(config)).rejects.toThrow('incomplete payload'); + }); + + test('missing guardian_id rejects with "incomplete payload"', async () => { + launchWebAuthFlowImpl = async () => + 'https://fakeextid.chromiumapp.org/cloud-auth#token=abc123&expires_in=3600'; + + await expect(signInCloud(config)).rejects.toThrow('incomplete payload'); + }); + + test('cancelled flow rejects with "cancelled"', async () => { + launchWebAuthFlowImpl = async () => undefined; + + await expect(signInCloud(config)).rejects.toThrow('cancelled'); + expect(fakeStorage.data[STORAGE_KEY]).toBeUndefined(); + }); + + test('trims trailing slash on gatewayBaseUrl', async () => { + let seenUrl = ''; + launchWebAuthFlowImpl = async (details) => { + seenUrl = details.url; + return 'https://fakeextid.chromiumapp.org/cloud-auth#token=abc&expires_in=60&guardian_id=g1'; + }; + await signInCloud({ gatewayBaseUrl: 'https://api.vellum.ai/', clientId: 'cid' }); + expect(seenUrl).toContain('https://api.vellum.ai/oauth/chrome-extension/start'); + expect(seenUrl).not.toContain('api.vellum.ai//oauth'); + }); +}); + +describe('getStoredToken', () => { + test('returns null when nothing is stored', async () => { + expect(await getStoredToken()).toBeNull(); + }); + + test('returns the stored token when valid', async () => { + const token: StoredCloudToken = { + token: 'valid-token', + expiresAt: Date.now() + 60_000, + guardianId: 'g-1', + }; + fakeStorage.data[STORAGE_KEY] = token; + + expect(await getStoredToken()).toEqual(token); + }); + + test('returns null when the token is expired', async () => { + fakeStorage.data[STORAGE_KEY] = { + token: 'expired', + expiresAt: Date.now() - 1_000, + guardianId: 'g-1', + } satisfies StoredCloudToken; + + expect(await getStoredToken()).toBeNull(); + }); + + test('returns null when the stored value is malformed', async () => { + fakeStorage.data[STORAGE_KEY] = { token: 42, expiresAt: 'soon' }; + + expect(await getStoredToken()).toBeNull(); + }); + + test('returns null when guardianId is missing or non-string', async () => { + // Missing guardianId entirely — would otherwise render as "guardian:undefined" in the popup. + fakeStorage.data[STORAGE_KEY] = { + token: 'valid-token', + expiresAt: Date.now() + 60_000, + }; + expect(await getStoredToken()).toBeNull(); + + // Non-string guardianId (e.g. a number). + fakeStorage.data[STORAGE_KEY] = { + token: 'valid-token', + expiresAt: Date.now() + 60_000, + guardianId: 42, + }; + expect(await getStoredToken()).toBeNull(); + }); +}); + +describe('clearStoredToken', () => { + test('removes the key from storage', async () => { + fakeStorage.data[STORAGE_KEY] = { + token: 'to-clear', + expiresAt: Date.now() + 60_000, + guardianId: 'g-1', + } satisfies StoredCloudToken; + + await clearStoredToken(); + expect(fakeStorage.data[STORAGE_KEY]).toBeUndefined(); + }); + + test('is a no-op when nothing is stored', async () => { + await clearStoredToken(); + expect(fakeStorage.data[STORAGE_KEY]).toBeUndefined(); + }); +}); diff --git a/clients/chrome-extension/background/__tests__/host-browser-dispatcher.test.ts b/clients/chrome-extension/background/__tests__/host-browser-dispatcher.test.ts new file mode 100644 index 00000000000..a88e466c28b --- /dev/null +++ b/clients/chrome-extension/background/__tests__/host-browser-dispatcher.test.ts @@ -0,0 +1,668 @@ +/** + * Tests for the host_browser envelope dispatcher. + * + * Drives the dispatcher against an injected mock `CdpProxy` so we can + * exercise the happy path, CDP error envelopes, exception propagation, + * cancellation, and dispose without touching any real chrome.debugger or + * WebSocket surface. + */ + +import { describe, test, expect, beforeEach } from 'bun:test'; + +import { + createHostBrowserDispatcher, + type HostBrowserDispatcher, + type HostBrowserRequestEnvelope, + type HostBrowserCancelEnvelope, + type HostBrowserResultEnvelope, +} from '../host-browser-dispatcher.js'; +import type { + CdpProxy, + CdpRequestFrame, + CdpResultFrame, + CdpEventFrame, + CdpTarget, + CdpDebuggee, +} from '../cdp-proxy.js'; + +// ── Test fixtures ─────────────────────────────────────────────────── + +interface MockCdpProxyOptions { + /** Optional override for the next `send()` call's resolved frame. */ + sendResult?: CdpResultFrame; + /** + * Optional FIFO queue of canned `send()` results. Each call to `send()` + * shifts the head of this queue and returns it. Falls back to + * `sendResult` (or the default `{ id, result: { ok: true } }`) once the + * queue is empty. Useful for tests that need to sequence multiple + * different responses across repeat requests. + */ + sendResults?: CdpResultFrame[]; + /** If set, the next `send()` call will throw this error. */ + sendThrows?: Error; + /** If set, `attach()` will reject with this error. */ + attachThrows?: Error; +} + +interface MockCdpProxy extends CdpProxy { + attachCalls: Array<{ target: CdpTarget; requiredVersion: string }>; + sendCalls: Array<{ target: CdpTarget; frame: CdpRequestFrame }>; + detachCalls: CdpTarget[]; + disposeCalls: number; + /** + * Currently-registered onDetach handlers. Tests fire detach events by + * calling these directly via the `fireDetach` helper below. + */ + detachHandlers: Set<(target: CdpDebuggee, reason: string) => void>; + /** Synthetically dispatch a detach event to all registered handlers. */ + fireDetach(target: CdpDebuggee, reason?: string): void; +} + +function createMockCdpProxy(options: MockCdpProxyOptions = {}): MockCdpProxy { + const eventHandlers = new Set<(event: CdpEventFrame) => void>(); + const detachHandlers = new Set<(target: CdpDebuggee, reason: string) => void>(); + const attachCalls: Array<{ target: CdpTarget; requiredVersion: string }> = []; + const sendCalls: Array<{ target: CdpTarget; frame: CdpRequestFrame }> = []; + const detachCalls: CdpTarget[] = []; + let disposeCalls = 0; + // Mutable copy so each `send()` invocation can shift one off the front. + const queuedSendResults: CdpResultFrame[] = options.sendResults + ? [...options.sendResults] + : []; + + const proxy: MockCdpProxy = { + attachCalls, + sendCalls, + detachCalls, + detachHandlers, + get disposeCalls() { + return disposeCalls; + }, + async attach(target, requiredVersion) { + attachCalls.push({ target, requiredVersion }); + if (options.attachThrows) throw options.attachThrows; + }, + async detach(target) { + detachCalls.push(target); + }, + async send(target, frame) { + sendCalls.push({ target, frame }); + if (options.sendThrows) throw options.sendThrows; + const queued = queuedSendResults.shift(); + if (queued) { + // Re-tag the queued frame's id with the actual request id so the + // dispatcher's monotonic counter doesn't drift in the test view. + return { ...queued, id: frame.id }; + } + return options.sendResult ?? { id: frame.id, result: { ok: true } }; + }, + onEvent(handler) { + eventHandlers.add(handler); + return () => eventHandlers.delete(handler); + }, + onDetach(handler) { + detachHandlers.add(handler); + return () => detachHandlers.delete(handler); + }, + fireDetach(target, reason = 'target_closed') { + for (const h of detachHandlers) h(target, reason); + }, + dispose() { + disposeCalls += 1; + eventHandlers.clear(); + detachHandlers.clear(); + }, + }; + return proxy; +} + +interface DispatcherTestHarness { + dispatcher: HostBrowserDispatcher; + proxy: MockCdpProxy; + results: HostBrowserResultEnvelope[]; + resolveTargetCalls: Array; + /** Override this to throw from resolveTarget. */ + resolveTargetImpl: ( + cdpSessionId: string | undefined, + ) => Promise<{ tabId?: number; targetId?: string }>; + /** Override this to throw from postResult. */ + postResultImpl: (result: HostBrowserResultEnvelope) => Promise; +} + +function createHarness(options: MockCdpProxyOptions = {}): DispatcherTestHarness { + const proxy = createMockCdpProxy(options); + const results: HostBrowserResultEnvelope[] = []; + const resolveTargetCalls: Array = []; + + const harness: DispatcherTestHarness = { + dispatcher: null as unknown as HostBrowserDispatcher, + proxy, + results, + resolveTargetCalls, + resolveTargetImpl: async (cdpSessionId) => { + if (cdpSessionId) return { targetId: cdpSessionId }; + return { tabId: 42 }; + }, + postResultImpl: async (result) => { + results.push(result); + }, + }; + + harness.dispatcher = createHostBrowserDispatcher({ + cdpProxy: proxy, + resolveTarget: async (cdpSessionId) => { + resolveTargetCalls.push(cdpSessionId); + return harness.resolveTargetImpl(cdpSessionId); + }, + postResult: async (result) => { + await harness.postResultImpl(result); + }, + }); + + return harness; +} + +const sampleRequest: HostBrowserRequestEnvelope = { + type: 'host_browser_request', + requestId: 'req-1', + conversationId: 'conv-1', + cdpMethod: 'Browser.getVersion', + cdpParams: { foo: 'bar' }, +}; + +// ── Tests ─────────────────────────────────────────────────────────── + +describe('createHostBrowserDispatcher', () => { + let harness: DispatcherTestHarness; + + beforeEach(() => { + harness = createHarness(); + }); + + describe('handle — happy path', () => { + test('attaches, sends CDP command, and posts a success result', async () => { + harness = createHarness({ + sendResult: { + id: 1, + result: { product: 'Chrome/120', protocolVersion: '1.3' }, + }, + }); + + await harness.dispatcher.handle(sampleRequest); + + // resolveTarget was called once with no session id → active tab. + expect(harness.resolveTargetCalls).toEqual([undefined]); + + // Proxy attach + send happened with the resolved target. + expect(harness.proxy.attachCalls.length).toBe(1); + expect(harness.proxy.attachCalls[0].target).toEqual({ tabId: 42 }); + expect(harness.proxy.attachCalls[0].requiredVersion).toBe('1.3'); + + expect(harness.proxy.sendCalls.length).toBe(1); + expect(harness.proxy.sendCalls[0].target).toEqual({ tabId: 42 }); + expect(harness.proxy.sendCalls[0].frame.method).toBe('Browser.getVersion'); + expect(harness.proxy.sendCalls[0].frame.params).toEqual({ foo: 'bar' }); + + // A single success result was posted with the stringified CDP result. + expect(harness.results.length).toBe(1); + expect(harness.results[0].requestId).toBe('req-1'); + expect(harness.results[0].isError).toBe(false); + expect(harness.results[0].content).toBe( + JSON.stringify({ product: 'Chrome/120', protocolVersion: '1.3' }), + ); + }); + + test('routes via targetId when cdpSessionId is provided', async () => { + harness = createHarness({ + sendResult: { id: 1, result: {} }, + }); + + const withSession: HostBrowserRequestEnvelope = { + ...sampleRequest, + cdpSessionId: 'target-xyz', + }; + await harness.dispatcher.handle(withSession); + + expect(harness.resolveTargetCalls).toEqual(['target-xyz']); + expect(harness.proxy.attachCalls[0].target).toEqual({ targetId: 'target-xyz' }); + expect(harness.proxy.sendCalls[0].frame.sessionId).toBe('target-xyz'); + }); + }); + + describe('handle — attach deduplication', () => { + test('skips proxy.attach on repeat requests against the same target', async () => { + harness = createHarness({ + sendResult: { id: 1, result: {} }, + }); + + await harness.dispatcher.handle(sampleRequest); + await harness.dispatcher.handle({ ...sampleRequest, requestId: 'req-2' }); + await harness.dispatcher.handle({ ...sampleRequest, requestId: 'req-3' }); + + // Only the first request should have attached; the subsequent two + // reuse the cached attachment. + expect(harness.proxy.attachCalls.length).toBe(1); + expect(harness.proxy.sendCalls.length).toBe(3); + expect(harness.results.length).toBe(3); + expect(harness.results.every((r) => r.isError === false)).toBe(true); + }); + + test('tolerates "Already attached" errors from proxy.attach and caches success', async () => { + harness = createHarness({ + attachThrows: new Error( + 'Another debugger is already attached to the tab with id: 42.', + ), + }); + + await harness.dispatcher.handle(sampleRequest); + + // Send proceeded despite the attach error — the dispatcher treated + // "Already attached" as a non-fatal success. + expect(harness.proxy.attachCalls.length).toBe(1); + expect(harness.proxy.sendCalls.length).toBe(1); + expect(harness.results.length).toBe(1); + expect(harness.results[0].isError).toBe(false); + }); + + test('routes different targetIds to distinct attach entries', async () => { + harness = createHarness({ sendResult: { id: 1, result: {} } }); + + await harness.dispatcher.handle({ + ...sampleRequest, + cdpSessionId: 'target-A', + }); + await harness.dispatcher.handle({ + ...sampleRequest, + requestId: 'req-2', + cdpSessionId: 'target-B', + }); + // Second call to target-A should reuse the cached attachment. + await harness.dispatcher.handle({ + ...sampleRequest, + requestId: 'req-3', + cdpSessionId: 'target-A', + }); + + expect(harness.proxy.attachCalls.length).toBe(2); + expect(harness.proxy.attachCalls[0].target).toEqual({ targetId: 'target-A' }); + expect(harness.proxy.attachCalls[1].target).toEqual({ targetId: 'target-B' }); + }); + }); + + describe('handle — onDetach cache invalidation', () => { + test('re-attaches after Chrome fires onDetach for a tabId target', async () => { + harness = createHarness({ sendResult: { id: 1, result: {} } }); + + // First call attaches. + await harness.dispatcher.handle(sampleRequest); + expect(harness.proxy.attachCalls.length).toBe(1); + expect(harness.proxy.attachCalls[0].target).toEqual({ tabId: 42 }); + + // Second call (no detach yet) reuses the cached attachment — proves + // the entry is in the cache. + await harness.dispatcher.handle({ ...sampleRequest, requestId: 'req-2' }); + expect(harness.proxy.attachCalls.length).toBe(1); + + // Chrome fires onDetach for the tab — e.g. user closed it, navigated + // away, clicked Cancel on the chrome.debugger infobar, or another + // debugger took over via Target.attachToTarget. + harness.proxy.fireDetach({ tabId: 42 }, 'target_closed'); + + // Next call must re-attach because the cache entry was invalidated. + // Otherwise we'd silently send a CDP command against a torn-down + // session and hit a permanent failure. + await harness.dispatcher.handle({ ...sampleRequest, requestId: 'req-3' }); + expect(harness.proxy.attachCalls.length).toBe(2); + expect(harness.proxy.attachCalls[1].target).toEqual({ tabId: 42 }); + }); + + test('re-attaches after Chrome fires onDetach for a targetId target', async () => { + harness = createHarness({ sendResult: { id: 1, result: {} } }); + + const withSession: HostBrowserRequestEnvelope = { + ...sampleRequest, + cdpSessionId: 'target-xyz', + }; + + await harness.dispatcher.handle(withSession); + expect(harness.proxy.attachCalls.length).toBe(1); + + // Cache hit — second call must NOT re-attach. + await harness.dispatcher.handle({ ...withSession, requestId: 'req-2' }); + expect(harness.proxy.attachCalls.length).toBe(1); + + harness.proxy.fireDetach({ targetId: 'target-xyz' }, 'target_closed'); + + await harness.dispatcher.handle({ ...withSession, requestId: 'req-3' }); + expect(harness.proxy.attachCalls.length).toBe(2); + expect(harness.proxy.attachCalls[1].target).toEqual({ + targetId: 'target-xyz', + }); + }); + + test('detach for an unrelated target does not invalidate other entries', async () => { + harness = createHarness({ sendResult: { id: 1, result: {} } }); + + // Attach two distinct targets. + await harness.dispatcher.handle({ + ...sampleRequest, + cdpSessionId: 'target-A', + }); + await harness.dispatcher.handle({ + ...sampleRequest, + requestId: 'req-2', + cdpSessionId: 'target-B', + }); + expect(harness.proxy.attachCalls.length).toBe(2); + + // Detach only target-A. target-B's cached attachment must survive. + harness.proxy.fireDetach({ targetId: 'target-A' }, 'target_closed'); + + await harness.dispatcher.handle({ + ...sampleRequest, + requestId: 'req-3', + cdpSessionId: 'target-B', + }); + // No new attach for target-B. + expect(harness.proxy.attachCalls.length).toBe(2); + + // But target-A re-attaches. + await harness.dispatcher.handle({ + ...sampleRequest, + requestId: 'req-4', + cdpSessionId: 'target-A', + }); + expect(harness.proxy.attachCalls.length).toBe(3); + expect(harness.proxy.attachCalls[2].target).toEqual({ targetId: 'target-A' }); + }); + + test('detach for a debuggee shape with neither tabId nor targetId is a no-op', async () => { + harness = createHarness({ sendResult: { id: 1, result: {} } }); + + await harness.dispatcher.handle(sampleRequest); + expect(harness.proxy.attachCalls.length).toBe(1); + + // Defensive: a malformed detach payload (e.g. extensionId-only) must + // not throw and must not invalidate anything we care about. + harness.proxy.fireDetach({}, 'target_closed'); + + // Cache entry for tabId 42 is still there → no new attach. + await harness.dispatcher.handle({ ...sampleRequest, requestId: 'req-2' }); + expect(harness.proxy.attachCalls.length).toBe(1); + }); + }); + + describe('handle — send-error cache eviction', () => { + test('evicts the cache when send returns a detach-style error so the next request re-attaches', async () => { + // Two requests against the same target. The first send returns a + // "Target closed" error frame; the dispatcher must surface that + // error to the caller AND evict the cached attach so the second + // request re-runs proxy.attach instead of silently re-using a + // dead session. + harness = createHarness({ + sendResults: [ + { id: 0, error: { code: -32000, message: 'Target closed' } }, + { id: 0, result: { ok: true } }, + ], + }); + + await harness.dispatcher.handle(sampleRequest); + await harness.dispatcher.handle({ ...sampleRequest, requestId: 'req-2' }); + + // Two attaches: one before the first request, one before the second + // after the cache was evicted by the detach-style error response. + expect(harness.proxy.attachCalls.length).toBe(2); + expect(harness.proxy.attachCalls[0].target).toEqual({ tabId: 42 }); + expect(harness.proxy.attachCalls[1].target).toEqual({ tabId: 42 }); + + // Both sends fired against the same resolved target. + expect(harness.proxy.sendCalls.length).toBe(2); + + // The first request still surfaces the error frame to the caller — + // eviction is a recovery hint, not a retry. The second succeeds. + expect(harness.results.length).toBe(2); + expect(harness.results[0].isError).toBe(true); + expect(harness.results[0].content).toBe( + JSON.stringify({ code: -32000, message: 'Target closed' }), + ); + expect(harness.results[1].isError).toBe(false); + }); + + test('does not evict the cache when send returns a non-detach error', async () => { + // A "Method not implemented" failure is unrelated to the attach + // lifecycle — re-attaching wouldn't help and would be wasteful. + // The dispatcher must keep the cache entry intact and the next + // request must reuse the cached attach. + harness = createHarness({ + sendResults: [ + { + id: 0, + error: { code: -32601, message: 'Method not implemented' }, + }, + { id: 0, result: { ok: true } }, + ], + }); + + await harness.dispatcher.handle(sampleRequest); + await harness.dispatcher.handle({ ...sampleRequest, requestId: 'req-2' }); + + // Only one attach: the cache survived the non-detach error. + expect(harness.proxy.attachCalls.length).toBe(1); + expect(harness.proxy.sendCalls.length).toBe(2); + + expect(harness.results.length).toBe(2); + expect(harness.results[0].isError).toBe(true); + expect(harness.results[0].content).toBe( + JSON.stringify({ code: -32601, message: 'Method not implemented' }), + ); + expect(harness.results[1].isError).toBe(false); + }); + }); + + describe('handle — CDP error envelope', () => { + test('posts isError: true with the stringified error object', async () => { + harness = createHarness({ + sendResult: { + id: 1, + error: { code: -32000, message: 'cannot find context with specified id' }, + }, + }); + + await harness.dispatcher.handle(sampleRequest); + + expect(harness.results.length).toBe(1); + expect(harness.results[0].isError).toBe(true); + expect(harness.results[0].content).toBe( + JSON.stringify({ code: -32000, message: 'cannot find context with specified id' }), + ); + }); + }); + + describe('handle — exception path', () => { + test('posts isError: true when resolveTarget throws', async () => { + harness.resolveTargetImpl = async () => { + throw new Error('no active tab'); + }; + + await harness.dispatcher.handle(sampleRequest); + + expect(harness.proxy.attachCalls.length).toBe(0); + expect(harness.proxy.sendCalls.length).toBe(0); + expect(harness.results.length).toBe(1); + expect(harness.results[0].isError).toBe(true); + expect(harness.results[0].content).toBe('no active tab'); + expect(harness.results[0].requestId).toBe('req-1'); + }); + + test('posts isError: true when proxy.attach throws a non-"Already attached" error', async () => { + harness = createHarness({ + attachThrows: new Error('Cannot access a chrome:// URL'), + }); + + await harness.dispatcher.handle(sampleRequest); + + expect(harness.results.length).toBe(1); + expect(harness.results[0].isError).toBe(true); + expect(harness.results[0].content).toBe('Cannot access a chrome:// URL'); + }); + + test('posts isError: true when proxy.send throws', async () => { + harness = createHarness({ + sendThrows: new Error('debugger detached mid-command'), + }); + + await harness.dispatcher.handle(sampleRequest); + + expect(harness.results.length).toBe(1); + expect(harness.results[0].isError).toBe(true); + expect(harness.results[0].content).toBe('debugger detached mid-command'); + }); + + test('stringifies non-Error thrown values', async () => { + harness.resolveTargetImpl = async () => { + // eslint-disable-next-line no-throw-literal + throw 'raw string rejection'; + }; + + await harness.dispatcher.handle(sampleRequest); + + expect(harness.results.length).toBe(1); + expect(harness.results[0].isError).toBe(true); + expect(harness.results[0].content).toBe('raw string rejection'); + }); + + test('swallows postResult failures inside the catch handler (no unhandled rejection)', async () => { + // Force the handler into the error path AND make postResult itself + // throw. If the dispatcher does not guard the catch-block postResult, + // this rejection will escape and trip `handle()`. + harness = createHarness({ + sendThrows: new Error('boom from send'), + }); + let postResultCalls = 0; + harness.postResultImpl = async () => { + postResultCalls += 1; + throw new Error('relay socket torn down'); + }; + + // Must not reject. + let rejected: unknown = null; + try { + await harness.dispatcher.handle(sampleRequest); + } catch (err) { + rejected = err; + } + expect(rejected).toBeNull(); + + // We still attempted to post the error envelope once. + expect(postResultCalls).toBe(1); + }); + }); + + describe('cancel', () => { + test('aborts the in-flight controller for the matching request id', async () => { + // Gate resolveTarget on an externally-controllable promise so we can + // issue a cancel while the handler is still mid-flight. + let releaseResolve: () => void = () => {}; + const gate = new Promise((resolve) => { + releaseResolve = resolve; + }); + + harness.resolveTargetImpl = async () => { + await gate; + return { tabId: 7 }; + }; + + const handlePromise = harness.dispatcher.handle(sampleRequest); + + // Mid-flight cancel. + const cancelEnvelope: HostBrowserCancelEnvelope = { + type: 'host_browser_cancel', + requestId: 'req-1', + }; + harness.dispatcher.cancel(cancelEnvelope); + + // Release the gate so the handler can run to completion. + releaseResolve(); + await handlePromise; + + // Handler still ran to completion and posted a result — the dispatcher + // does not early-return on cancel; instead it removes the abort + // controller from the in-flight map. This matches the plan's acceptance + // criteria for the "cancel aborts the in-flight controller" test. + expect(harness.results.length).toBe(1); + }); + + test('is a no-op for unknown request ids', () => { + expect(() => + harness.dispatcher.cancel({ + type: 'host_browser_cancel', + requestId: 'unknown', + }), + ).not.toThrow(); + }); + }); + + describe('dispose', () => { + test('disposes the CDP proxy and clears any in-flight state', async () => { + // Start a long-running request so there's something in the in-flight map. + let releaseResolve: () => void = () => {}; + const gate = new Promise((resolve) => { + releaseResolve = resolve; + }); + harness.resolveTargetImpl = async () => { + await gate; + return { tabId: 1 }; + }; + + const pending = harness.dispatcher.handle(sampleRequest); + + // Dispose the dispatcher — this should dispose the CDP proxy and abort + // the in-flight controller. + harness.dispatcher.dispose(); + expect(harness.proxy.disposeCalls).toBe(1); + + // Release the gate so the awaited Promise can settle. + releaseResolve(); + await pending; + }); + + test('is safe to call multiple times (proxy is disposed each time)', () => { + harness.dispatcher.dispose(); + harness.dispatcher.dispose(); + expect(harness.proxy.disposeCalls).toBe(2); + }); + + test('clears attached-target cache so the next attach happens fresh', async () => { + harness = createHarness({ sendResult: { id: 1, result: {} } }); + + // Attach once. + await harness.dispatcher.handle(sampleRequest); + expect(harness.proxy.attachCalls.length).toBe(1); + + // Dispose clears the attached set (and the proxy). + harness.dispatcher.dispose(); + + // A new dispatcher built on a *fresh* proxy should attach again on + // first use — we can't reuse the disposed dispatcher, so this test + // verifies the semantic by starting over. + harness = createHarness({ sendResult: { id: 1, result: {} } }); + await harness.dispatcher.handle(sampleRequest); + expect(harness.proxy.attachCalls.length).toBe(1); + }); + + test('unsubscribes the onDetach handler', async () => { + harness = createHarness({ sendResult: { id: 1, result: {} } }); + + // Subscribing happens at construction time. The mock proxy exposes + // its handler set so we can directly observe registration/teardown. + expect(harness.proxy.detachHandlers.size).toBe(1); + + harness.dispatcher.dispose(); + + // After dispose the dispatcher must release its detach handler so + // the proxy isn't left holding a stale closure that references the + // disposed dispatcher's `attachedTargets` set. + expect(harness.proxy.detachHandlers.size).toBe(0); + }); + }); +}); diff --git a/clients/chrome-extension/background/__tests__/relay-connection.test.ts b/clients/chrome-extension/background/__tests__/relay-connection.test.ts new file mode 100644 index 00000000000..9d65970de40 --- /dev/null +++ b/clients/chrome-extension/background/__tests__/relay-connection.test.ts @@ -0,0 +1,567 @@ +/** + * Tests for the RelayConnection helper. + * + * Drives the class against a fake global WebSocket so we can exercise + * the open/message/close/reconnect lifecycle without touching a real + * socket. Covers both self-hosted and cloud modes and the caller-close + * vs unexpected-close branches. + */ + +import { describe, test, expect, beforeEach, afterEach } from 'bun:test'; + +import { RelayConnection, type RelayMode } from '../relay-connection.js'; + +// ── Fake WebSocket ────────────────────────────────────────────────── + +type WsListener = (ev: { data?: unknown; code?: number; reason?: string }) => void; + +interface FakeWebSocket { + url: string; + readyState: number; + listeners: Map>; + sent: string[]; + close: (code?: number, reason?: string) => void; + send: (data: string) => void; + addEventListener: (type: string, listener: WsListener) => void; + removeEventListener: (type: string, listener: WsListener) => void; + dispatch: (type: string, ev: { data?: unknown; code?: number; reason?: string }) => void; + /** Track whether close() was called by the helper (caller-side) */ + closeCallsByCaller: Array<{ code?: number; reason?: string }>; +} + +let instances: FakeWebSocket[] = []; + +function makeFakeWebSocket(url: string): FakeWebSocket { + const listeners = new Map>(); + const sent: string[] = []; + const closeCallsByCaller: Array<{ code?: number; reason?: string }> = []; + const ws: FakeWebSocket = { + url, + readyState: 0, // CONNECTING + listeners, + sent, + closeCallsByCaller, + close(code, reason) { + closeCallsByCaller.push({ code, reason }); + ws.readyState = 3; // CLOSED + }, + send(data) { + sent.push(data); + }, + addEventListener(type, listener) { + if (!listeners.has(type)) listeners.set(type, new Set()); + listeners.get(type)!.add(listener); + }, + removeEventListener(type, listener) { + listeners.get(type)?.delete(listener); + }, + dispatch(type, ev) { + const set = listeners.get(type); + if (!set) return; + for (const l of set) l(ev); + }, + }; + return ws; +} + +// Mimic the WebSocket.OPEN etc. static constants used by the class. +function installFakeWebSocket(): void { + instances = []; + const FakeCtor = function (this: unknown, url: string) { + const instance = makeFakeWebSocket(url); + instances.push(instance); + return instance as unknown as WebSocket; + } as unknown as typeof WebSocket; + (FakeCtor as unknown as { CONNECTING: number }).CONNECTING = 0; + (FakeCtor as unknown as { OPEN: number }).OPEN = 1; + (FakeCtor as unknown as { CLOSING: number }).CLOSING = 2; + (FakeCtor as unknown as { CLOSED: number }).CLOSED = 3; + (globalThis as unknown as { WebSocket: typeof WebSocket }).WebSocket = FakeCtor; +} + +const originalWebSocket = (globalThis as unknown as { WebSocket?: typeof WebSocket }).WebSocket; + +beforeEach(() => { + installFakeWebSocket(); +}); + +afterEach(() => { + (globalThis as unknown as { WebSocket?: typeof WebSocket }).WebSocket = originalWebSocket; +}); + +/** Walk the fake-ws instance into the OPEN state and fire the open event. */ +function openSocket(ws: FakeWebSocket): void { + ws.readyState = 1; + ws.dispatch('open', {}); +} + +/** Fire a close event as if the server kicked us. */ +function closeSocket(ws: FakeWebSocket, code = 1006, reason = 'abnormal'): void { + ws.readyState = 3; + ws.dispatch('close', { code, reason }); +} + +// ── Harness ───────────────────────────────────────────────────────── + +interface Callbacks { + openCalls: number; + closeCalls: Array<{ code: number; reason: string }>; + messages: string[]; +} + +function makeCallbacks(): Callbacks { + return { openCalls: 0, closeCalls: [], messages: [] }; +} + +function makeConn(mode: RelayMode, callbacks: Callbacks, onReconnect?: () => Promise): RelayConnection { + return new RelayConnection({ + mode, + onOpen: () => { + callbacks.openCalls += 1; + }, + onMessage: (data) => { + callbacks.messages.push(data); + }, + onClose: (code, reason) => { + callbacks.closeCalls.push({ code, reason }); + }, + onReconnect, + }); +} + +// ── Tests ─────────────────────────────────────────────────────────── + +describe('RelayConnection', () => { + describe('start', () => { + test('opens a self-hosted WebSocket to the expected URL', () => { + const cbs = makeCallbacks(); + const conn = makeConn( + { + kind: 'self-hosted', + baseUrl: 'http://127.0.0.1:7830', + token: 'local-token-abc', + }, + cbs, + ); + + conn.start(); + + expect(instances.length).toBe(1); + expect(instances[0].url).toBe( + 'ws://127.0.0.1:7830/v1/browser-relay?token=local-token-abc', + ); + + openSocket(instances[0]); + expect(cbs.openCalls).toBe(1); + expect(conn.isOpen()).toBe(true); + }); + + test('opens a cloud WebSocket to the expected wss URL', () => { + const cbs = makeCallbacks(); + const conn = makeConn( + { + kind: 'cloud', + baseUrl: 'https://api.vellum.ai', + token: 'cloud-jwt-xyz', + }, + cbs, + ); + + conn.start(); + + expect(instances.length).toBe(1); + expect(instances[0].url).toBe( + 'wss://api.vellum.ai/v1/browser-relay?token=cloud-jwt-xyz', + ); + }); + + test('URL-encodes special characters in the token', () => { + const cbs = makeCallbacks(); + const conn = makeConn( + { + kind: 'cloud', + baseUrl: 'https://api.vellum.ai/', + token: 'a b+c/d=', + }, + cbs, + ); + + conn.start(); + + expect(instances.length).toBe(1); + expect(instances[0].url).toBe( + 'wss://api.vellum.ai/v1/browser-relay?token=a%20b%2Bc%2Fd%3D', + ); + }); + + test('strips a trailing slash on the base URL', () => { + const cbs = makeCallbacks(); + const conn = makeConn( + { + kind: 'cloud', + baseUrl: 'https://api.vellum.ai/', + token: 'tok', + }, + cbs, + ); + + conn.start(); + + expect(instances[0].url).not.toContain('ai//'); + expect(instances[0].url).toBe('wss://api.vellum.ai/v1/browser-relay?token=tok'); + }); + + test('omits the token query param when the caller passes null', () => { + const cbs = makeCallbacks(); + const conn = makeConn( + { + kind: 'self-hosted', + baseUrl: 'http://127.0.0.1:7830', + token: null, + }, + cbs, + ); + + conn.start(); + + expect(instances[0].url).toBe('ws://127.0.0.1:7830/v1/browser-relay'); + }); + }); + + describe('onMessage', () => { + test('forwards incoming messages to the caller', () => { + const cbs = makeCallbacks(); + const conn = makeConn( + { kind: 'self-hosted', baseUrl: 'http://127.0.0.1:7830', token: 't' }, + cbs, + ); + + conn.start(); + openSocket(instances[0]); + instances[0].dispatch('message', { data: 'hello-from-daemon' }); + instances[0].dispatch('message', { data: 'second' }); + + expect(cbs.messages).toEqual(['hello-from-daemon', 'second']); + }); + + test('stringifies non-string event data (belt-and-suspenders)', () => { + const cbs = makeCallbacks(); + const conn = makeConn( + { kind: 'self-hosted', baseUrl: 'http://127.0.0.1:7830', token: 't' }, + cbs, + ); + + conn.start(); + openSocket(instances[0]); + // Simulate a binary frame that arrived as a Blob-like object; the + // class uses String(ev.data) to keep the callback signature simple. + instances[0].dispatch('message', { data: 42 }); + + expect(cbs.messages).toEqual(['42']); + }); + }); + + describe('close', () => { + test('caller-close prevents reconnect on a subsequent unexpected close', async () => { + const cbs = makeCallbacks(); + const conn = makeConn( + { kind: 'self-hosted', baseUrl: 'http://127.0.0.1:7830', token: 't' }, + cbs, + ); + + conn.start(); + openSocket(instances[0]); + expect(instances.length).toBe(1); + + conn.close(); + // close() synchronously calls the underlying ws.close — the fake + // dispatches the close event manually only on explicit dispatch. + closeSocket(instances[0], 1000, 'caller closed'); + + // Wait a tick to be sure any stray setTimeout didn't enqueue a + // reconnect. + await new Promise((r) => setTimeout(r, 5)); + expect(instances.length).toBe(1); + }); + + test('marks closedByCaller so isOpen returns false', () => { + const cbs = makeCallbacks(); + const conn = makeConn( + { kind: 'self-hosted', baseUrl: 'http://127.0.0.1:7830', token: 't' }, + cbs, + ); + + conn.start(); + openSocket(instances[0]); + expect(conn.isOpen()).toBe(true); + conn.close(); + expect(conn.isOpen()).toBe(false); + }); + + test('close with code 1000 on the helper forwards to the socket', () => { + const cbs = makeCallbacks(); + const conn = makeConn( + { kind: 'self-hosted', baseUrl: 'http://127.0.0.1:7830', token: 't' }, + cbs, + ); + + conn.start(); + openSocket(instances[0]); + conn.close(1000, 'bye'); + + expect(instances[0].closeCallsByCaller.length).toBe(1); + expect(instances[0].closeCallsByCaller[0].code).toBe(1000); + expect(instances[0].closeCallsByCaller[0].reason).toBe('bye'); + }); + }); + + describe('reconnect', () => { + test('unexpected close triggers reconnect after a delay', async () => { + const cbs = makeCallbacks(); + const conn = makeConn( + { kind: 'self-hosted', baseUrl: 'http://127.0.0.1:7830', token: 't' }, + cbs, + ); + + conn.start(); + openSocket(instances[0]); + expect(instances.length).toBe(1); + + // Server-side abnormal close (e.g. the daemon restarted). + closeSocket(instances[0], 1006, 'abnormal'); + + expect(cbs.closeCalls.length).toBe(1); + expect(cbs.closeCalls[0].code).toBe(1006); + + // The reconnect is scheduled behind a real setTimeout — wait long + // enough for it to fire. The base delay is 1000ms; we tolerate + // some scheduling jitter. + await new Promise((r) => setTimeout(r, 1100)); + + expect(instances.length).toBe(2); + expect(instances[1].url).toBe( + 'ws://127.0.0.1:7830/v1/browser-relay?token=t', + ); + + // Clean up. + conn.close(); + }); + + test('normal close (code 1000) does NOT call onReconnect', async () => { + const cbs = makeCallbacks(); + let reconnectCalls = 0; + const conn = makeConn( + { kind: 'self-hosted', baseUrl: 'http://127.0.0.1:7830', token: 't' }, + cbs, + async () => { + reconnectCalls += 1; + return 'new-token'; + }, + ); + + conn.start(); + openSocket(instances[0]); + + // Normal close — should still schedule a reconnect but without + // calling the refresh hook. + closeSocket(instances[0], 1000, 'normal'); + await new Promise((r) => setTimeout(r, 1100)); + + expect(reconnectCalls).toBe(0); + expect(instances.length).toBe(2); + + conn.close(); + }); + + test('onReconnect replaces the token used for the next URL', async () => { + const cbs = makeCallbacks(); + let refreshCalls = 0; + const conn = makeConn( + { kind: 'self-hosted', baseUrl: 'http://127.0.0.1:7830', token: 'old' }, + cbs, + async () => { + refreshCalls += 1; + return 'fresh-token'; + }, + ); + + conn.start(); + openSocket(instances[0]); + expect(instances[0].url).toContain('token=old'); + + closeSocket(instances[0], 4001, 'auth rotated'); + await new Promise((r) => setTimeout(r, 1100)); + + expect(refreshCalls).toBe(1); + expect(instances.length).toBe(2); + expect(instances[1].url).toContain('token=fresh-token'); + + conn.close(); + }); + + test('onReconnect returning void leaves the existing token in place', async () => { + const cbs = makeCallbacks(); + const conn = makeConn( + { kind: 'self-hosted', baseUrl: 'http://127.0.0.1:7830', token: 'keep' }, + cbs, + async () => { + // no return → void + }, + ); + + conn.start(); + openSocket(instances[0]); + closeSocket(instances[0], 1006, 'abnormal'); + await new Promise((r) => setTimeout(r, 1100)); + + expect(instances.length).toBe(2); + expect(instances[1].url).toContain('token=keep'); + + conn.close(); + }); + + test('close called before scheduled reconnect fires prevents reconnection', async () => { + const cbs = makeCallbacks(); + const conn = makeConn( + { kind: 'self-hosted', baseUrl: 'http://127.0.0.1:7830', token: 't' }, + cbs, + ); + + conn.start(); + openSocket(instances[0]); + closeSocket(instances[0], 1006, 'abnormal'); + + // Cancel before the reconnect timer fires. + conn.close(); + + await new Promise((r) => setTimeout(r, 1100)); + + // No second socket should have been constructed. + expect(instances.length).toBe(1); + }); + }); + + describe('setMode', () => { + test('closes the current socket and opens a new one for the new mode', () => { + const cbs = makeCallbacks(); + const conn = makeConn( + { kind: 'self-hosted', baseUrl: 'http://127.0.0.1:7830', token: 't' }, + cbs, + ); + + conn.start(); + openSocket(instances[0]); + expect(instances.length).toBe(1); + expect(instances[0].url).toContain('ws://127.0.0.1'); + + conn.setMode({ kind: 'cloud', baseUrl: 'https://api.vellum.ai', token: 'cloud-jwt' }); + + // Old socket was closed by the caller; new one was constructed + // against the cloud URL. + expect(instances[0].closeCallsByCaller.length).toBe(1); + expect(instances.length).toBe(2); + expect(instances[1].url).toBe( + 'wss://api.vellum.ai/v1/browser-relay?token=cloud-jwt', + ); + + conn.close(); + }); + + test('updates the mode accessor', () => { + const cbs = makeCallbacks(); + const conn = makeConn( + { kind: 'self-hosted', baseUrl: 'http://127.0.0.1:7830', token: 't' }, + cbs, + ); + conn.start(); + expect(conn.getCurrentMode().kind).toBe('self-hosted'); + + conn.setMode({ kind: 'cloud', baseUrl: 'https://api.vellum.ai', token: 'c' }); + expect(conn.getCurrentMode().kind).toBe('cloud'); + + conn.close(); + }); + + test('stale close event from a superseded socket does not clear the new ws or schedule reconnect', async () => { + const cbs = makeCallbacks(); + const conn = makeConn( + { kind: 'self-hosted', baseUrl: 'http://127.0.0.1:7830', token: 't' }, + cbs, + ); + + conn.start(); + openSocket(instances[0]); + expect(instances.length).toBe(1); + const oldSocket = instances[0]; + + // Switch modes mid-flight: the helper closes socket A (oldSocket) + // and constructs socket B (newSocket) for the cloud gateway. We + // keep newSocket in CONNECTING so we can observe the state that + // would be disturbed by a stale close event. + conn.setMode({ + kind: 'cloud', + baseUrl: 'https://api.vellum.ai', + token: 'cloud-jwt', + }); + expect(instances.length).toBe(2); + const newSocket = instances[1]; + expect(newSocket.url).toBe( + 'wss://api.vellum.ai/v1/browser-relay?token=cloud-jwt', + ); + expect(conn.getCurrentMode().kind).toBe('cloud'); + + // Now simulate the asynchronous close event that socket A fires + // after setMode already re-pointed this.ws at socket B. The + // helper should ignore it entirely: this.ws stays pinned to + // newSocket, no reconnect is queued, and onClose is NOT invoked + // (we already told the caller we switched modes). + closeSocket(oldSocket, 1006, 'stale'); + + // No onClose call — the close event came from a superseded socket. + expect(cbs.closeCalls.length).toBe(0); + + // Open the new socket to confirm the helper still holds a valid + // reference to it. If the stale close had nulled out this.ws we'd + // see isOpen() stay false here. + openSocket(newSocket); + expect(conn.isOpen()).toBe(true); + + // Wait long enough that any reconnect timer would have fired. + await new Promise((r) => setTimeout(r, 1100)); + + // Still only the original two sockets — no spurious reconnect. + expect(instances.length).toBe(2); + + conn.close(); + }); + }); + + describe('send', () => { + test('writes to the underlying socket when OPEN', () => { + const cbs = makeCallbacks(); + const conn = makeConn( + { kind: 'self-hosted', baseUrl: 'http://127.0.0.1:7830', token: 't' }, + cbs, + ); + + conn.start(); + openSocket(instances[0]); + conn.send('hello-daemon'); + + expect(instances[0].sent).toEqual(['hello-daemon']); + }); + + test('is a no-op before the socket is OPEN', () => { + const cbs = makeCallbacks(); + const conn = makeConn( + { kind: 'self-hosted', baseUrl: 'http://127.0.0.1:7830', token: 't' }, + cbs, + ); + + conn.start(); + // ws.readyState is still CONNECTING (0). + conn.send('too-early'); + expect(instances[0].sent).toEqual([]); + }); + }); +}); diff --git a/clients/chrome-extension/background/__tests__/self-hosted-auth.test.ts b/clients/chrome-extension/background/__tests__/self-hosted-auth.test.ts new file mode 100644 index 00000000000..019a82b9d82 --- /dev/null +++ b/clients/chrome-extension/background/__tests__/self-hosted-auth.test.ts @@ -0,0 +1,439 @@ +/** + * Tests for the self-hosted capability-token bootstrap state machine. + * + * These tests mock `chrome.runtime.connectNative` and `chrome.storage.local` + * so they can run under bun:test without a real Chrome runtime. The fake + * native-messaging port is a tiny event emitter that lets the test drive + * `onMessage` / `onDisconnect` callbacks by hand. + */ + +import { describe, test, expect, beforeEach, afterEach } from 'bun:test'; + +import { + getStoredLocalToken, + clearLocalToken, + bootstrapLocalToken, + type StoredLocalToken, +} from '../self-hosted-auth.js'; + +const STORAGE_KEY = 'vellum.localCapabilityToken'; + +interface FakeStorage { + data: Record; + /** + * When set, the next `set()` call rejects with this error (then the + * override is cleared). Used to exercise the storage-failure path. + */ + nextSetError?: Error; + get(key: string | string[]): Promise>; + set(items: Record): Promise; + remove(key: string | string[]): Promise; +} + +function createFakeStorage(): FakeStorage { + const data: Record = {}; + const storage: FakeStorage = { + data, + async get(key) { + const keys = Array.isArray(key) ? key : [key]; + const result: Record = {}; + for (const k of keys) { + if (k in data) result[k] = data[k]; + } + return result; + }, + async set(items) { + if (storage.nextSetError) { + const err = storage.nextSetError; + storage.nextSetError = undefined; + throw err; + } + Object.assign(data, items); + }, + async remove(key) { + const keys = Array.isArray(key) ? key : [key]; + for (const k of keys) delete data[k]; + }, + }; + return storage; +} + +interface FakePort { + name: string; + onMessage: { + addListener(listener: (msg: unknown) => void): void; + removeListener(listener: (msg: unknown) => void): void; + }; + onDisconnect: { + addListener(listener: (port: FakePort) => void): void; + removeListener(listener: (port: FakePort) => void): void; + }; + postMessage(message: unknown): void; + disconnect(): void; + + // Test handles + sent: unknown[]; + disconnected: boolean; + emitMessage(msg: unknown): void; + emitDisconnect(): void; +} + +interface FakeNativeRuntime { + lastError: { message?: string } | undefined; + connectNative(name: string): FakePort; + /** Set by the test to control how newly-created ports behave. */ + onConnect?: (port: FakePort, application: string) => void; + /** The most recently created port, for convenience in tests. */ + currentPort?: FakePort; + /** Log of application names passed to connectNative. */ + connectCalls: string[]; +} + +function createFakePort(): FakePort { + const messageListeners: Array<(msg: unknown) => void> = []; + const disconnectListeners: Array<(port: FakePort) => void> = []; + const port: FakePort = { + name: 'com.vellum.daemon', + onMessage: { + addListener(listener) { + messageListeners.push(listener); + }, + removeListener(listener) { + const idx = messageListeners.indexOf(listener); + if (idx >= 0) messageListeners.splice(idx, 1); + }, + }, + onDisconnect: { + addListener(listener) { + disconnectListeners.push(listener); + }, + removeListener(listener) { + const idx = disconnectListeners.indexOf(listener); + if (idx >= 0) disconnectListeners.splice(idx, 1); + }, + }, + postMessage(message) { + port.sent.push(message); + }, + disconnect() { + port.disconnected = true; + }, + sent: [], + disconnected: false, + emitMessage(msg) { + for (const listener of messageListeners.slice()) listener(msg); + }, + emitDisconnect() { + for (const listener of disconnectListeners.slice()) listener(port); + }, + }; + return port; +} + +function createFakeRuntime(): FakeNativeRuntime { + const runtime: FakeNativeRuntime = { + lastError: undefined, + connectCalls: [], + connectNative(application: string) { + runtime.connectCalls.push(application); + const port = createFakePort(); + runtime.currentPort = port; + if (runtime.onConnect) runtime.onConnect(port, application); + return port; + }, + }; + return runtime; +} + +const originalChrome = (globalThis as { chrome?: unknown }).chrome; + +let fakeStorage: FakeStorage; +let fakeRuntime: FakeNativeRuntime; + +beforeEach(() => { + fakeStorage = createFakeStorage(); + fakeRuntime = createFakeRuntime(); + (globalThis as { chrome?: unknown }).chrome = { + storage: { + local: fakeStorage, + }, + runtime: fakeRuntime, + }; +}); + +afterEach(() => { + (globalThis as { chrome?: unknown }).chrome = originalChrome; +}); + +describe('bootstrapLocalToken', () => { + test('happy path persists and returns the token', async () => { + const issuedAt = Date.now(); + const expiresAtIso = new Date(issuedAt + 3_600_000).toISOString(); + + fakeRuntime.onConnect = (port) => { + // Drive the response asynchronously to mirror how Chrome delivers + // native-messaging frames in practice. + queueMicrotask(() => { + port.emitMessage({ + type: 'token_response', + token: 'abc123', + expiresAt: expiresAtIso, + guardianId: 'g-42', + }); + }); + }; + + const result = await bootstrapLocalToken(); + expect(result.token).toBe('abc123'); + expect(result.guardianId).toBe('g-42'); + expect(result.expiresAt).toBe(Date.parse(expiresAtIso)); + + // Port was told to request a token. + expect(fakeRuntime.connectCalls).toEqual(['com.vellum.daemon']); + expect(fakeRuntime.currentPort?.sent).toEqual([{ type: 'request_token' }]); + + // Token was persisted. + expect(fakeStorage.data[STORAGE_KEY]).toEqual(result); + + // Port was cleaned up. + expect(fakeRuntime.currentPort?.disconnected).toBe(true); + }); + + test('accepts numeric expiresAt for forward compatibility', async () => { + const expiresAt = Date.now() + 60_000; + + fakeRuntime.onConnect = (port) => { + queueMicrotask(() => { + port.emitMessage({ + type: 'token_response', + token: 'num-token', + expiresAt, + guardianId: 'g-1', + }); + }); + }; + + const result = await bootstrapLocalToken(); + expect(result.expiresAt).toBe(expiresAt); + }); + + test('malformed token_response rejects and does not persist', async () => { + fakeRuntime.onConnect = (port) => { + queueMicrotask(() => { + port.emitMessage({ + type: 'token_response', + token: 'abc', + // Missing expiresAt + guardianId: 'g-1', + }); + }); + }; + + await expect(bootstrapLocalToken()).rejects.toThrow('malformed token_response'); + expect(fakeStorage.data[STORAGE_KEY]).toBeUndefined(); + expect(fakeRuntime.currentPort?.disconnected).toBe(true); + }); + + test('error frame rejects with the helper message', async () => { + fakeRuntime.onConnect = (port) => { + queueMicrotask(() => { + port.emitMessage({ type: 'error', message: 'unauthorized_origin' }); + }); + }; + + await expect(bootstrapLocalToken()).rejects.toThrow('unauthorized_origin'); + expect(fakeStorage.data[STORAGE_KEY]).toBeUndefined(); + expect(fakeRuntime.currentPort?.disconnected).toBe(true); + }); + + test('error frame without message falls back to a default', async () => { + fakeRuntime.onConnect = (port) => { + queueMicrotask(() => { + port.emitMessage({ type: 'error' }); + }); + }; + + await expect(bootstrapLocalToken()).rejects.toThrow('native messaging error'); + }); + + test('timeout rejects and disconnects the port', async () => { + // onConnect is a no-op — the helper never responds. + fakeRuntime.onConnect = () => { + // Intentionally silent. + }; + + await expect(bootstrapLocalToken({ timeoutMs: 20 })).rejects.toThrow( + 'native messaging timeout', + ); + expect(fakeStorage.data[STORAGE_KEY]).toBeUndefined(); + expect(fakeRuntime.currentPort?.disconnected).toBe(true); + }); + + test('disconnect before response rejects with lastError message', async () => { + fakeRuntime.lastError = { message: 'Specified native messaging host not found.' }; + fakeRuntime.onConnect = (port) => { + queueMicrotask(() => { + port.emitDisconnect(); + }); + }; + + await expect(bootstrapLocalToken()).rejects.toThrow( + 'Specified native messaging host not found.', + ); + expect(fakeStorage.data[STORAGE_KEY]).toBeUndefined(); + }); + + test('disconnect with no lastError falls back to generic message', async () => { + fakeRuntime.onConnect = (port) => { + queueMicrotask(() => { + port.emitDisconnect(); + }); + }; + + await expect(bootstrapLocalToken()).rejects.toThrow( + 'native messaging disconnected before response', + ); + }); + + test('resolves with the in-memory token when chrome.storage.local.set fails', async () => { + const expiresAtIso = new Date(Date.now() + 60_000).toISOString(); + fakeStorage.nextSetError = new Error('QuotaExceededError'); + + fakeRuntime.onConnect = (port) => { + queueMicrotask(() => { + port.emitMessage({ + type: 'token_response', + token: 'persist-fail', + expiresAt: expiresAtIso, + guardianId: 'g-persist', + }); + }); + }; + + // The caller should still receive a usable token even though we + // failed to save it to chrome.storage.local. Persistence is + // best-effort from the pair flow's perspective — the in-memory + // token is still valid for the current session and the popup + // surfaces the same record to the user. + const result = await bootstrapLocalToken(); + expect(result.token).toBe('persist-fail'); + expect(result.guardianId).toBe('g-persist'); + expect(result.expiresAt).toBe(Date.parse(expiresAtIso)); + + // But nothing was actually written to storage. + expect(fakeStorage.data[STORAGE_KEY]).toBeUndefined(); + + // And the port was torn down as part of marking the promise settled. + expect(fakeRuntime.currentPort?.disconnected).toBe(true); + }); + + test('ignores onDisconnect after a valid token_response (race)', async () => { + // Simulates the real-world race where the native helper writes its + // token_response frame and then immediately exits, causing Chrome + // to fire onDisconnect on the same turn as onMessage. Before the + // fix, `settled` was only flipped after the async storage write + // resolved, so a fast disconnect could win the race and reject a + // valid pairing. Now `settled` is set synchronously the moment the + // token frame is validated, so the subsequent disconnect is a no-op. + fakeRuntime.lastError = { message: 'port closed' }; + + const expiresAtIso = new Date(Date.now() + 60_000).toISOString(); + + fakeRuntime.onConnect = (port) => { + queueMicrotask(() => { + port.emitMessage({ + type: 'token_response', + token: 'race-winner', + expiresAt: expiresAtIso, + guardianId: 'g-race', + }); + // Emitted on the same microtask turn as the token frame, before + // the persistLocalToken promise has a chance to resolve. If the + // disconnect handler rejects here, the test fails. + port.emitDisconnect(); + }); + }; + + const result = await bootstrapLocalToken(); + expect(result.token).toBe('race-winner'); + expect(result.guardianId).toBe('g-race'); + // Token was still persisted despite the racing disconnect. + expect(fakeStorage.data[STORAGE_KEY]).toEqual(result); + }); + + test('ignores unknown frame types until a recognised frame arrives', async () => { + const expiresAtIso = new Date(Date.now() + 60_000).toISOString(); + + fakeRuntime.onConnect = (port) => { + queueMicrotask(() => { + port.emitMessage({ type: 'some_future_type', payload: {} }); + port.emitMessage(null); + port.emitMessage('not-an-object'); + port.emitMessage({ + type: 'token_response', + token: 'late', + expiresAt: expiresAtIso, + guardianId: 'g-late', + }); + }); + }; + + const result = await bootstrapLocalToken(); + expect(result.token).toBe('late'); + }); +}); + +describe('getStoredLocalToken', () => { + test('returns null when nothing is stored', async () => { + expect(await getStoredLocalToken()).toBeNull(); + }); + + test('returns the stored token when valid', async () => { + const token: StoredLocalToken = { + token: 'valid', + expiresAt: Date.now() + 60_000, + guardianId: 'g-1', + }; + fakeStorage.data[STORAGE_KEY] = token; + expect(await getStoredLocalToken()).toEqual(token); + }); + + test('returns null when the token is expired', async () => { + fakeStorage.data[STORAGE_KEY] = { + token: 'expired', + expiresAt: Date.now() - 1_000, + guardianId: 'g-1', + } satisfies StoredLocalToken; + expect(await getStoredLocalToken()).toBeNull(); + }); + + test('returns null when the stored value is malformed', async () => { + fakeStorage.data[STORAGE_KEY] = { token: 42, expiresAt: 'soon' }; + expect(await getStoredLocalToken()).toBeNull(); + }); + + test('returns null when guardianId is missing', async () => { + fakeStorage.data[STORAGE_KEY] = { + token: 'valid', + expiresAt: Date.now() + 60_000, + }; + expect(await getStoredLocalToken()).toBeNull(); + }); +}); + +describe('clearLocalToken', () => { + test('removes the key from storage', async () => { + fakeStorage.data[STORAGE_KEY] = { + token: 'to-clear', + expiresAt: Date.now() + 60_000, + guardianId: 'g-1', + } satisfies StoredLocalToken; + await clearLocalToken(); + expect(fakeStorage.data[STORAGE_KEY]).toBeUndefined(); + }); + + test('is a no-op when nothing is stored', async () => { + await clearLocalToken(); + expect(fakeStorage.data[STORAGE_KEY]).toBeUndefined(); + }); +}); diff --git a/clients/chrome-extension/background/__tests__/worker-host-browser-result.test.ts b/clients/chrome-extension/background/__tests__/worker-host-browser-result.test.ts new file mode 100644 index 00000000000..02d35215b05 --- /dev/null +++ b/clients/chrome-extension/background/__tests__/worker-host-browser-result.test.ts @@ -0,0 +1,381 @@ +/** + * Tests for the relay-aware host_browser result poster. + * + * Drives `postHostBrowserResult` against a fake `fetch` and a fake + * `RelayConnection` so we can exercise both transport branches without + * standing up a real socket or local daemon. Covers: + * + * - self-hosted mode: POSTs to `${baseUrl}/v1/host-browser-result` + * with `Authorization: Bearer ` and the JSON-serialised + * result envelope as the body. + * - cloud mode with an OPEN connection: sends a JSON-stringified + * `host_browser_result` frame via `connection.send` and never + * touches `fetch`. + * - cloud mode with a closed or null connection: logs a warning, + * never touches `fetch`, and never throws. + * + * The function lives in `relay-connection.ts` (rather than `worker.ts`) + * so the test can import it directly without dragging in the chrome + * service worker module surface. + * + * Related: worker.ts's `connect()` re-reads `vellum.relayMode` from + * chrome.storage.local at entry to close a race where the popup toggles + * the mode radio and immediately clicks Connect before the async + * `chrome.storage.onChanged` listener updates the module-level + * `relayMode` variable. That live-read cannot be unit-tested here + * without dragging in the entire service worker module surface + * (chrome.* globals, bootstrap(), native messaging, etc.), but the + * behaviour is verifiable by reading `connect()` in worker.ts. + */ + +import { describe, test, expect, beforeEach, afterEach } from 'bun:test'; + +import { + postHostBrowserResult, + type RelayConnectionLike, + type RelayMode, +} from '../relay-connection.js'; +import type { HostBrowserResultEnvelope } from '../host-browser-dispatcher.js'; + +// ── Fake transports ───────────────────────────────────────────────── + +interface FakeFetchCall { + input: string; + init?: RequestInit; +} + +interface FakeFetchHandle { + calls: FakeFetchCall[]; + /** Sets the response returned by the next fetch call. */ + setNextResponse(resp: Response): void; + restore(): void; +} + +function installFakeFetch(): FakeFetchHandle { + const calls: FakeFetchCall[] = []; + let nextResponse: Response = new Response(null, { status: 200 }); + const original = (globalThis as { fetch?: typeof fetch }).fetch; + (globalThis as { fetch: typeof fetch }).fetch = (async ( + input: RequestInfo | URL, + init?: RequestInit, + ) => { + calls.push({ input: String(input), init }); + return nextResponse; + }) as typeof fetch; + return { + calls, + setNextResponse(resp) { + nextResponse = resp; + }, + restore() { + if (original) { + (globalThis as { fetch: typeof fetch }).fetch = original; + } else { + delete (globalThis as { fetch?: typeof fetch }).fetch; + } + }, + }; +} + +interface FakeConnection extends RelayConnectionLike { + sent: string[]; + /** Toggle whether `isOpen()` returns true or false. */ + open: boolean; + /** + * Mutable mode reference. Tests can reassign this to simulate a + * token refresh after a reconnect-with-refresh cycle and then + * verify that subsequent `getCurrentMode()` reads pick up the new + * value (i.e. the caller is NOT caching a snapshot). + */ + mode: RelayMode; +} + +function makeFakeConnection(open: boolean, mode?: RelayMode): FakeConnection { + const sent: string[] = []; + const defaultMode: RelayMode = { + kind: 'self-hosted', + baseUrl: 'http://127.0.0.1:9999', + token: 'tok-initial', + }; + return { + sent, + open, + mode: mode ?? defaultMode, + isOpen() { + return this.open; + }, + send(data) { + sent.push(data); + }, + getCurrentMode() { + return this.mode; + }, + }; +} + +interface ConsoleSpy { + warnings: unknown[][]; + restore(): void; +} + +function spyConsoleWarn(): ConsoleSpy { + const warnings: unknown[][] = []; + const original = console.warn; + console.warn = (...args: unknown[]) => { + warnings.push(args); + }; + return { + warnings, + restore() { + console.warn = original; + }, + }; +} + +// ── Fixtures ──────────────────────────────────────────────────────── + +const exampleResult: HostBrowserResultEnvelope = { + requestId: 'req-abc', + content: '{"frameId":"42"}', + isError: false, +}; + +let fetchHandle: FakeFetchHandle; +let consoleSpy: ConsoleSpy; + +beforeEach(() => { + fetchHandle = installFakeFetch(); + consoleSpy = spyConsoleWarn(); +}); + +afterEach(() => { + fetchHandle.restore(); + consoleSpy.restore(); +}); + +// ── Self-hosted mode ──────────────────────────────────────────────── + +describe('postHostBrowserResult — self-hosted mode', () => { + test('POSTs to ${baseUrl}/v1/host-browser-result with bearer auth', async () => { + const mode: RelayMode = { + kind: 'self-hosted', + baseUrl: 'http://127.0.0.1:9999', + token: 'tok-1', + }; + + await postHostBrowserResult(mode, null, exampleResult); + + expect(fetchHandle.calls.length).toBe(1); + const call = fetchHandle.calls[0]; + expect(call.input).toBe('http://127.0.0.1:9999/v1/host-browser-result'); + expect(call.init?.method).toBe('POST'); + const headers = call.init?.headers as Record | undefined; + expect(headers?.authorization).toBe('Bearer tok-1'); + expect(headers?.['content-type']).toBe('application/json'); + expect(call.init?.body).toBe(JSON.stringify(exampleResult)); + }); + + test('omits the authorization header when no token is configured', async () => { + const mode: RelayMode = { + kind: 'self-hosted', + baseUrl: 'http://127.0.0.1:9999', + token: null, + }; + + await postHostBrowserResult(mode, null, exampleResult); + + expect(fetchHandle.calls.length).toBe(1); + const headers = fetchHandle.calls[0].init?.headers as + | Record + | undefined; + expect(headers?.authorization).toBeUndefined(); + }); + + test('strips a trailing slash from the base URL', async () => { + const mode: RelayMode = { + kind: 'self-hosted', + baseUrl: 'http://127.0.0.1:9999/', + token: 'tok-1', + }; + + await postHostBrowserResult(mode, null, exampleResult); + + expect(fetchHandle.calls[0].input).toBe( + 'http://127.0.0.1:9999/v1/host-browser-result', + ); + }); + + test('logs a warning when the daemon returns a non-2xx status', async () => { + fetchHandle.setNextResponse(new Response(null, { status: 503 })); + const mode: RelayMode = { + kind: 'self-hosted', + baseUrl: 'http://127.0.0.1:9999', + token: 'tok-1', + }; + + await postHostBrowserResult(mode, null, exampleResult); + + expect(consoleSpy.warnings.length).toBeGreaterThanOrEqual(1); + const flat = consoleSpy.warnings.flat().join(' '); + expect(flat).toContain('503'); + }); + + test('ignores the supplied connection in self-hosted mode', async () => { + const conn = makeFakeConnection(true); + const mode: RelayMode = { + kind: 'self-hosted', + baseUrl: 'http://127.0.0.1:9999', + token: 'tok-1', + }; + + await postHostBrowserResult(mode, conn, exampleResult); + + expect(fetchHandle.calls.length).toBe(1); + expect(conn.sent).toEqual([]); + }); +}); + +// ── Cloud mode ────────────────────────────────────────────────────── + +describe('postHostBrowserResult — cloud mode', () => { + test('sends a host_browser_result frame over an open connection and skips fetch', async () => { + const conn = makeFakeConnection(true); + const mode: RelayMode = { + kind: 'cloud', + baseUrl: 'https://api.vellum.ai', + token: 'cloud-token', + }; + + await postHostBrowserResult(mode, conn, exampleResult); + + expect(fetchHandle.calls).toEqual([]); + expect(conn.sent.length).toBe(1); + const parsed = JSON.parse(conn.sent[0]) as Record; + expect(parsed.type).toBe('host_browser_result'); + expect(parsed.requestId).toBe(exampleResult.requestId); + expect(parsed.content).toBe(exampleResult.content); + expect(parsed.isError).toBe(exampleResult.isError); + }); + + test('warns and no-ops when the connection is not open', async () => { + const conn = makeFakeConnection(false); + const mode: RelayMode = { + kind: 'cloud', + baseUrl: 'https://api.vellum.ai', + token: 'cloud-token', + }; + + const returned = await postHostBrowserResult(mode, conn, exampleResult); + expect(returned).toBeUndefined(); + + expect(fetchHandle.calls).toEqual([]); + expect(conn.sent).toEqual([]); + const flat = consoleSpy.warnings.flat().join(' '); + expect(flat).toContain('cloud relay not connected'); + }); + + test('warns and no-ops when the connection is null', async () => { + const mode: RelayMode = { + kind: 'cloud', + baseUrl: 'https://api.vellum.ai', + token: 'cloud-token', + }; + + const returned = await postHostBrowserResult(mode, null, exampleResult); + expect(returned).toBeUndefined(); + + expect(fetchHandle.calls).toEqual([]); + const flat = consoleSpy.warnings.flat().join(' '); + expect(flat).toContain('cloud relay not connected'); + }); +}); + +// ── Live mode read (stale-token regression) ───────────────────────── +// +// Pins the call-site contract that worker.ts's +// `dispatchHostBrowserResult` MUST pull the mode out of the live +// RelayConnection via `getCurrentMode()` on every dispatch, rather +// than closing over a snapshot captured at `connect()` time. +// +// The regression being pinned: when `scheduleReconnectWithRefresh` +// fires after a WebSocket drop, the connection's internal `deps.mode` +// is replaced with a new object holding a freshly minted token. A +// caller that cached the old mode object would silently 401/403 +// forever. By reading through `getCurrentMode()` on every POST, the +// new token propagates automatically. +// +// We model the exact call-site pattern used by worker.ts — +// `const mode = conn.getCurrentMode(); return postHostBrowserResult(mode, conn, result);` +// — so that this test fails the moment someone re-introduces a +// snapshot capture. + +async function dispatchViaConnection( + conn: RelayConnectionLike, + result: HostBrowserResultEnvelope, +): Promise { + const mode = conn.getCurrentMode(); + return postHostBrowserResult(mode, conn, result); +} + +describe('postHostBrowserResult — live mode read via getCurrentMode()', () => { + test('self-hosted: second dispatch picks up a refreshed token from the connection', async () => { + const conn = makeFakeConnection(true, { + kind: 'self-hosted', + baseUrl: 'http://127.0.0.1:9999', + token: 'tok-old', + }); + + await dispatchViaConnection(conn, exampleResult); + + // Simulate a reconnect-with-refresh cycle: the RelayConnection + // would replace `deps.mode` with a new object holding the fresh + // token. We mutate the fake's `mode` field to mimic that swap. + conn.mode = { + kind: 'self-hosted', + baseUrl: 'http://127.0.0.1:9999', + token: 'tok-new', + }; + + await dispatchViaConnection(conn, exampleResult); + + expect(fetchHandle.calls.length).toBe(2); + const firstHeaders = fetchHandle.calls[0].init?.headers as + | Record + | undefined; + const secondHeaders = fetchHandle.calls[1].init?.headers as + | Record + | undefined; + expect(firstHeaders?.authorization).toBe('Bearer tok-old'); + expect(secondHeaders?.authorization).toBe('Bearer tok-new'); + }); + + test('self-hosted: mode swap to cloud on the connection routes subsequent dispatches over the WebSocket', async () => { + // Extra belt-and-braces: if the mode KIND itself flips (e.g. a + // mode switch via setMode), the call-site must still read it + // live. A captured snapshot would POST to the now-wrong baseUrl. + const conn = makeFakeConnection(true, { + kind: 'self-hosted', + baseUrl: 'http://127.0.0.1:9999', + token: 'tok-old', + }); + + await dispatchViaConnection(conn, exampleResult); + expect(fetchHandle.calls.length).toBe(1); + expect(conn.sent.length).toBe(0); + + conn.mode = { + kind: 'cloud', + baseUrl: 'https://api.vellum.ai', + token: 'cloud-token', + }; + + await dispatchViaConnection(conn, exampleResult); + + // Still exactly one fetch — the cloud dispatch must not have + // fallen through to an HTTP POST. + expect(fetchHandle.calls.length).toBe(1); + expect(conn.sent.length).toBe(1); + const parsed = JSON.parse(conn.sent[0]) as Record; + expect(parsed.type).toBe('host_browser_result'); + }); +}); diff --git a/clients/chrome-extension/background/cdp-proxy.ts b/clients/chrome-extension/background/cdp-proxy.ts new file mode 100644 index 00000000000..def988f6ea6 --- /dev/null +++ b/clients/chrome-extension/background/cdp-proxy.ts @@ -0,0 +1,302 @@ +/** + * Standalone CDP JSON-RPC proxy that wraps `chrome.debugger`. + * + * Provides a typed attach/detach/send/onEvent surface over the chrome.debugger + * API. The module is decoupled from the service worker lifecycle and from any + * relay transport so it can be consumed by a host-browser dispatcher and + * exercised in isolation. The `ChromeDebuggerApi` interface is injectable so + * tests can drive both success and error paths against a mock. + * + * Flat-session handling + * --------------------- + * Chrome 125+ supports flat sessions created via `Target.attachToTarget` with + * `flatten: true`. For flat sessions the child `sessionId` is addressed via + * the `target` (DebuggerSession) argument to `chrome.debugger.sendCommand` — + * NOT by smuggling the value into the command params object. This proxy + * mirrors that contract: + * + * - `send()`: when `frame.sessionId` is provided, the value is attached to + * the `DebuggerSession` target passed to `api.sendCommand`. Command params + * are forwarded as-is. + * + * - `onEvent()`: the `source: DebuggerSession` argument supplied by Chrome + * identifies the originating session. The proxy reads `source.sessionId` + * and hoists it onto the emitted `CdpEventFrame.sessionId` so consumers + * can route events without having to inspect the source object. + * + * Errors from the underlying chrome.debugger callbacks are read through + * `api.runtime.lastError` (rather than the global `chrome.runtime.lastError`) + * so that tests passing a mocked `ChromeDebuggerApi` can simulate failures + * by toggling `runtime.lastError` on the mock. + */ + +/** Raw CDP frame as received from the runtime over the relay. */ +export interface CdpRequestFrame { + id: number; + method: string; + params?: Record; + /** + * Optional CDP session id for nested flat sessions. Routed via the + * `DebuggerSession` target argument of `chrome.debugger.sendCommand` (see + * the module docstring for the flat-session contract). + */ + sessionId?: string; +} + +/** Raw CDP result frame that the extension sends back. */ +export interface CdpResultFrame { + id: number; + result?: unknown; + error?: { code: number; message: string; data?: unknown }; +} + +/** CDP event frame forwarded from chrome.debugger.onEvent. */ +export interface CdpEventFrame { + method: string; + params?: unknown; + sessionId?: string; +} + +/** + * Target identifier passed to chrome.debugger.attach. Source-compatible with + * `chrome.debugger.Debuggee` so the two values can be interchanged at the + * call site without a structural cast. + */ +export interface CdpDebuggee { + tabId?: number; + extensionId?: string; + targetId?: string; +} + +/** + * `DebuggerSession` is the Chrome 125+ shape that `chrome.debugger.sendCommand` + * (and the `onEvent` `source` argument) accept: a `Debuggee` plus an optional + * `sessionId` that addresses a child flat session created via + * `Target.attachToTarget` with `flatten: true`. + */ +export interface DebuggerSession extends CdpDebuggee { + sessionId?: string; +} + +export interface CdpTarget { + tabId?: number; + targetId?: string; +} + +export interface CdpProxy { + attach(target: CdpTarget, requiredVersion: string): Promise; + detach(target: CdpTarget): Promise; + send(target: CdpTarget, frame: CdpRequestFrame): Promise; + onEvent(handler: (event: CdpEventFrame) => void): () => void; // returns unsubscribe + onDetach(handler: (target: CdpDebuggee, reason: string) => void): () => void; // returns unsubscribe + dispose(): void; +} + +/** + * Inject the chrome.debugger API (plus the slice of `chrome.runtime` we read + * `lastError` from) so tests can pass a mock. The shape is intentionally + * source-compatible with the real `chrome.debugger` namespace plus a + * `runtime.lastError` field — at runtime we satisfy this by composing + * `chrome.debugger` and `chrome.runtime` (see the default in `createCdpProxy`). + * + * `attach` / `detach` / `sendCommand` / `onEvent` all accept a + * `DebuggerSession` so that flat-session child commands and events can be + * routed via the target's `sessionId` field, matching Chrome 125+ semantics. + * + * Reading `lastError` through the injected `api.runtime.lastError` (rather + * than the global `chrome.runtime.lastError`) is what makes the proxy + * properly testable: a mocked `ChromeDebuggerApi` can simulate failure paths + * by toggling `runtime.lastError` on the mock between callback invocations. + */ +export interface ChromeDebuggerApi { + attach(target: DebuggerSession, requiredVersion: string, callback?: () => void): void; + detach(target: DebuggerSession, callback?: () => void): void; + sendCommand( + target: DebuggerSession, + method: string, + params?: Record, + callback?: (result?: unknown) => void, + ): void; + onEvent: { + addListener( + callback: ( + source: DebuggerSession, + method: string, + params?: unknown, + ) => void, + ): void; + removeListener( + callback: ( + source: DebuggerSession, + method: string, + params?: unknown, + ) => void, + ): void; + }; + onDetach: { + addListener(callback: (source: CdpDebuggee, reason: string) => void): void; + removeListener(callback: (source: CdpDebuggee, reason: string) => void): void; + }; + /** + * Mirror of `chrome.runtime.lastError`. The chrome.debugger callbacks + * report errors by setting `chrome.runtime.lastError` synchronously inside + * the callback. We thread the `runtime` reference through the injectable + * api so that mocked tests do not need to set anything on a global `chrome`. + */ + runtime: { + lastError?: { message?: string }; + }; +} + +/** + * Compose a default `ChromeDebuggerApi` from the real `chrome` global. We + * build a plain object with `chrome.debugger`'s methods explicitly bound to + * `chrome.debugger`, plus a live `runtime` getter that reads `chrome.runtime` + * on every access. Methods MUST be bound: Chrome's native bindings check + * `this` to be the original `chrome.debugger` object and will throw + * "Illegal invocation" otherwise. The `onEvent` / `onDetach` event objects + * are forwarded as-is — `chrome.events.Event` instances expose + * `addListener` / `removeListener` that don't depend on the surrounding + * receiver. The `runtime` getter (rather than a snapshot) is required because + * `chrome.runtime.lastError` is set by the browser synchronously during + * callback invocation. + */ +function defaultChromeDebuggerApi(): ChromeDebuggerApi { + const d = chrome.debugger; + return { + attach: d.attach.bind(d), + detach: d.detach.bind(d), + sendCommand: d.sendCommand.bind(d), + onEvent: d.onEvent, + onDetach: d.onDetach, + get runtime() { + return chrome.runtime; + }, + }; +} + +export function createCdpProxy(api: ChromeDebuggerApi = defaultChromeDebuggerApi()): CdpProxy { + const eventHandlers = new Set<(event: CdpEventFrame) => void>(); + const detachHandlers = new Set<(target: CdpDebuggee, reason: string) => void>(); + + const onEventListener = ( + source: DebuggerSession, + method: string, + params?: unknown, + ) => { + // For flat sessions Chrome 125+ surfaces the originating session id on + // the `source` DebuggerSession (NOT inside `params`). Hoist it onto the + // emitted CdpEventFrame so downstream consumers can route events without + // having to inspect the source object. + const event: CdpEventFrame = { method, params, sessionId: source.sessionId }; + for (const h of eventHandlers) { + try { + h(event); + } catch (err) { + console.error("[cdp-proxy] event handler threw", err); + } + } + }; + api.onEvent.addListener(onEventListener); + + const onDetachListener = (source: CdpDebuggee, reason: string) => { + for (const h of detachHandlers) { + try { + h(source, reason); + } catch (err) { + console.error("[cdp-proxy] detach handler threw", err); + } + } + }; + api.onDetach.addListener(onDetachListener); + + function targetToDebuggee(target: CdpTarget): CdpDebuggee { + if (target.targetId) return { targetId: target.targetId }; + if (target.tabId !== undefined) return { tabId: target.tabId }; + throw new Error("CdpTarget must have either tabId or targetId"); + } + + return { + attach(target, requiredVersion) { + return new Promise((resolve, reject) => { + api.attach(targetToDebuggee(target), requiredVersion, () => { + const err = api.runtime.lastError; + if (err) reject(new Error(err.message ?? "chrome.debugger.attach failed")); + else resolve(); + }); + }); + }, + detach(target) { + return new Promise((resolve, reject) => { + api.detach(targetToDebuggee(target), () => { + const err = api.runtime.lastError; + if (err) reject(new Error(err.message ?? "chrome.debugger.detach failed")); + else resolve(); + }); + }); + }, + /** + * Dispatch a CDP command. For flat sessions (created via + * `Target.attachToTarget` with `flatten: true`) Chrome 125+ routes the + * child `sessionId` via the `target` (DebuggerSession) argument — not + * via the command params object. When `frame.sessionId` is provided we + * attach it to the DebuggerSession passed to `api.sendCommand`; params + * are forwarded as-is. + */ + send(target, frame) { + return new Promise((resolve) => { + // Defensive: `targetToDebuggee` throws synchronously when the target + // has neither `tabId` nor `targetId`. Without this try/catch the + // throw escapes via promise rejection — but `send()`'s contract is + // to ALWAYS resolve with a CdpResultFrame (success OR error), so + // callers awaiting it would observe an unhandled rejection instead + // of the expected error envelope. Convert the throw into a CDP + // -32602 ("invalid params") error frame so the contract holds. + let debuggerSession: DebuggerSession; + try { + debuggerSession = frame.sessionId + ? { ...targetToDebuggee(target), sessionId: frame.sessionId } + : targetToDebuggee(target); + } catch (err) { + resolve({ + id: frame.id, + error: { + code: -32602, + message: err instanceof Error ? err.message : String(err), + }, + }); + return; + } + api.sendCommand(debuggerSession, frame.method, frame.params, (result) => { + const err = api.runtime.lastError; + if (err) { + resolve({ + id: frame.id, + error: { code: -32000, message: err.message ?? "chrome.debugger.sendCommand failed" }, + }); + } else { + resolve({ id: frame.id, result }); + } + }); + }); + }, + onEvent(handler) { + eventHandlers.add(handler); + return () => { + eventHandlers.delete(handler); + }; + }, + onDetach(handler) { + detachHandlers.add(handler); + return () => { + detachHandlers.delete(handler); + }; + }, + dispose() { + eventHandlers.clear(); + detachHandlers.clear(); + api.onEvent.removeListener(onEventListener); + api.onDetach.removeListener(onDetachListener); + }, + }; +} diff --git a/clients/chrome-extension/background/cloud-auth.ts b/clients/chrome-extension/background/cloud-auth.ts new file mode 100644 index 00000000000..b63d5690d21 --- /dev/null +++ b/clients/chrome-extension/background/cloud-auth.ts @@ -0,0 +1,78 @@ +/** + * Cloud OAuth sign-in state machine for the Vellum chrome extension. + * + * Launches chrome.identity.launchWebAuthFlow against the Vellum gateway and + * persists the guardian-bound JWT in chrome.storage.local. The token is used + * by later PRs to authenticate the browser-relay WebSocket against the cloud + * gateway — this module is the storage + state machine layer only. + */ + +export interface CloudAuthConfig { + /** Gateway base URL, e.g. https://api.vellum.ai */ + gatewayBaseUrl: string; + /** OAuth client id registered for the chrome extension. */ + clientId: string; +} + +export interface StoredCloudToken { + token: string; + expiresAt: number; // ms since epoch + guardianId: string; +} + +const STORAGE_KEY = 'vellum.cloudAuthToken'; + +export async function getStoredToken(): Promise { + const result = await chrome.storage.local.get(STORAGE_KEY); + const raw = result[STORAGE_KEY]; + if (!raw || typeof raw !== 'object') return null; + const token = raw as StoredCloudToken; + if ( + typeof token.token !== 'string' || + typeof token.expiresAt !== 'number' || + typeof token.guardianId !== 'string' + ) { + return null; + } + if (token.expiresAt <= Date.now()) return null; + return token; +} + +export async function clearStoredToken(): Promise { + await chrome.storage.local.remove(STORAGE_KEY); +} + +async function persistToken(token: StoredCloudToken): Promise { + await chrome.storage.local.set({ [STORAGE_KEY]: token }); +} + +/** + * Launches chrome.identity.launchWebAuthFlow to obtain a guardian-bound JWT. + * The extension receives the token via the redirect URI fragment. + */ +export async function signInCloud(config: CloudAuthConfig): Promise { + const redirectUri = chrome.identity.getRedirectURL('cloud-auth'); + const authUrl = + `${config.gatewayBaseUrl.replace(/\/$/, '')}/oauth/chrome-extension/start` + + `?client_id=${encodeURIComponent(config.clientId)}` + + `&redirect_uri=${encodeURIComponent(redirectUri)}`; + + const responseUrl = await chrome.identity.launchWebAuthFlow({ url: authUrl, interactive: true }); + if (!responseUrl) throw new Error('cloud sign-in cancelled'); + + const hash = new URL(responseUrl).hash.replace(/^#/, ''); + const params = new URLSearchParams(hash); + const token = params.get('token'); + const expiresIn = parseInt(params.get('expires_in') ?? '0', 10); + const guardianId = params.get('guardian_id') ?? ''; + if (!token || !expiresIn || !guardianId) { + throw new Error('cloud sign-in returned incomplete payload'); + } + const stored: StoredCloudToken = { + token, + expiresAt: Date.now() + expiresIn * 1000, + guardianId, + }; + await persistToken(stored); + return stored; +} diff --git a/clients/chrome-extension/background/host-browser-dispatcher.ts b/clients/chrome-extension/background/host-browser-dispatcher.ts new file mode 100644 index 00000000000..7b3e72f15e8 --- /dev/null +++ b/clients/chrome-extension/background/host-browser-dispatcher.ts @@ -0,0 +1,242 @@ +/** + * host_browser envelope dispatcher. + * + * Consumes `host_browser_request` / `host_browser_cancel` envelopes received + * over the existing browser-relay WebSocket, drives a CdpProxy to execute the + * CDP command against a resolved debuggee target, and POSTs a result envelope + * back to the daemon's `/v1/host-browser-result` HTTP endpoint. + * + * This module is deliberately transport-agnostic: the `worker.ts` service + * worker is responsible for pulling envelopes off the WebSocket and calling + * `handle()` / `cancel()`, and for providing the `resolveTarget` + `postResult` + * dependency closures. That keeps the dispatcher easy to unit-test in + * isolation against a mock CdpProxy. + * + * Phase 2 / PR 9: this dispatcher is only wired up when the + * `vellum.cdpProxyEnabled` feature flag is set in `chrome.storage.local`. + * With the flag off, the legacy `ExtensionCommand` handlers in worker.ts + * continue to service browser tools exactly as before. + */ + +import { + createCdpProxy, + type CdpDebuggee, + type CdpProxy, + type CdpTarget, +} from './cdp-proxy.js'; + +/** + * host_browser_request envelope as received over the existing browser-relay + * WebSocket. Field names are camelCase to match the daemon's ServerMessage + * discriminator wire format — see + * `assistant/src/daemon/message-types/host-browser.ts` for the canonical + * types. Note `timeout_seconds` is the one snake_case field the daemon emits + * (a holdover from Phase 1) and we preserve it as-is. + */ +export interface HostBrowserRequestEnvelope { + type: 'host_browser_request'; + requestId: string; + conversationId: string; + cdpMethod: string; + cdpParams?: Record; + cdpSessionId?: string; + timeout_seconds?: number; +} + +/** host_browser_cancel envelope sent when the daemon side aborts a request. */ +export interface HostBrowserCancelEnvelope { + type: 'host_browser_cancel'; + requestId: string; +} + +/** + * Result envelope POSTed back to the runtime's /v1/host-browser-result + * endpoint. Shape mirrors the runtime Zod schema in + * `assistant/src/runtime/routes/host-browser-routes.ts` (`requestId`, + * `content`, `isError`): `content` is the stringified CDP result (or error), + * and `isError` is true if the CDP command reported a JSON-RPC error + * envelope or if the dispatcher itself threw before it could reach the + * result frame. + */ +export interface HostBrowserResultEnvelope { + requestId: string; + content: string; + isError: boolean; +} + +export interface HostBrowserDispatcherDeps { + /** + * Target resolver. When `cdpSessionId` is provided it is treated as an + * opaque `targetId` (matching how the CdpProxy addresses flat sessions via + * the DebuggerSession target field). Otherwise the resolver should fall + * back to "most recently active tab". + */ + resolveTarget( + cdpSessionId: string | undefined, + ): Promise<{ tabId?: number; targetId?: string }>; + /** POST result envelope back to /v1/host-browser-result. */ + postResult(result: HostBrowserResultEnvelope): Promise; + /** Optional injected CdpProxy for tests. Defaults to a real proxy at runtime. */ + cdpProxy?: CdpProxy; +} + +export interface HostBrowserDispatcher { + handle(envelope: HostBrowserRequestEnvelope): Promise; + cancel(envelope: HostBrowserCancelEnvelope): void; + dispose(): void; +} + +/** + * Stable string key for an attach-tracking set. A CdpTarget is either a + * numeric `tabId` or an opaque `targetId` string — we serialize whichever + * is set into a prefix-disambiguated key so tabId=123 and targetId="123" + * can't collide. + */ +function targetKey(target: CdpTarget): string { + if (target.targetId) return `targetId:${target.targetId}`; + if (target.tabId !== undefined) return `tabId:${target.tabId}`; + throw new Error('CdpTarget must have either tabId or targetId'); +} + +/** + * Build the same target-key from a `CdpDebuggee` payload as `targetKey` + * does for a `CdpTarget`. The CDP proxy's `onDetach` callback receives a + * `CdpDebuggee` (the chrome.debugger Debuggee shape), so we need a helper + * that produces an identical key from that variant — otherwise the cache + * deletion on detach would silently miss and the stale entry would persist. + * + * Returns `null` when the debuggee shape carries neither a `tabId` nor a + * `targetId` (e.g. extensionId-only attaches, which the dispatcher does + * not currently use). Callers treat null as "nothing to invalidate". + */ +function debuggeeKey(debuggee: CdpDebuggee): string | null { + if (debuggee.targetId) return `targetId:${debuggee.targetId}`; + if (debuggee.tabId !== undefined) return `tabId:${debuggee.tabId}`; + return null; +} + +export function createHostBrowserDispatcher( + deps: HostBrowserDispatcherDeps, +): HostBrowserDispatcher { + const proxy = deps.cdpProxy ?? createCdpProxy(); + const inFlight = new Map(); + // Track which targets we've already attached to so repeat commands + // against the same tab/session don't unnecessarily call attach again. + // Chrome treats a second attach as a hard failure ("Another debugger is + // already attached..."), so either we dedupe here or we catch the error. + // Deduping is cheaper and keeps the happy path clean. + const attachedTargets = new Set(); + let nextCdpId = 1; + + // Invalidate the attached-targets cache whenever Chrome notifies us that + // it has detached the debugger from a target. This covers tab close, + // navigation across security origins, the user clicking "Cancel" on the + // chrome.debugger infobar, and another debugger taking over via + // Target.attachToTarget. Without this subscription the cache would hold + // a stale entry forever and subsequent commands against the same target + // would skip the re-attach and hit a permanent CDP failure. + const unsubscribeOnDetach = proxy.onDetach((debuggee) => { + const key = debuggeeKey(debuggee); + if (key !== null) attachedTargets.delete(key); + }); + + async function handle(envelope: HostBrowserRequestEnvelope): Promise { + const abort = new AbortController(); + inFlight.set(envelope.requestId, abort); + try { + const target = await deps.resolveTarget(envelope.cdpSessionId); + const key = targetKey(target); + if (!attachedTargets.has(key)) { + try { + await proxy.attach(target, '1.3'); + attachedTargets.add(key); + } catch (attachErr) { + // Tolerate the "already attached" race: Chrome surfaces this as + // "Another debugger is already attached to the tab with id: N." + // when a concurrent sibling request or an earlier invocation that + // predates this dispatcher instance already owns the debuggee. + // Treat it as success and record the target as attached. The + // match is case-insensitive because Chrome's wording has shifted + // across versions and across extensionId/tabId/targetId variants. + const msg = ( + attachErr instanceof Error ? attachErr.message : String(attachErr) + ).toLowerCase(); + if (msg.includes('already attached')) { + attachedTargets.add(key); + } else { + throw attachErr; + } + } + } + const frame = await proxy.send(target, { + id: nextCdpId++, + method: envelope.cdpMethod, + params: envelope.cdpParams, + sessionId: envelope.cdpSessionId, + }); + // Recovery hint: if the CDP send returned an error indicating the + // target is no longer attached (tab closed mid-flight, navigated + // across origins, another debugger took over, etc.), evict the + // cache entry so the *next* request triggers a fresh attach. The + // current request still fails — eviction does not retry, it only + // unblocks subsequent traffic that would otherwise hit the same + // stale-cache failure forever. + // + // Error matching is intentionally string-based: chrome.debugger + // surfaces these failures via `chrome.runtime.lastError.message` + // and the wording varies across Chrome versions. cdp-proxy maps + // those into `{ code: -32000, message }` JSON-RPC error frames. + if (frame.error) { + const errMsg = (frame.error.message ?? '').toLowerCase(); + if ( + errMsg.includes('not attached') || + errMsg.includes('detached') || + errMsg.includes('target closed') || + errMsg.includes('no target with given id') + ) { + attachedTargets.delete(key); + } + } + await deps.postResult({ + requestId: envelope.requestId, + content: JSON.stringify(frame.error ?? frame.result ?? {}), + isError: frame.error != null, + }); + } catch (err) { + // Guard the failure-path postResult in its own try/catch: if the HTTP + // POST itself fails (e.g. the relay socket is torn down while we're + // in the error path) we must NOT let that secondary rejection escape + // to the Chrome service worker as an unhandled promise rejection. + try { + await deps.postResult({ + requestId: envelope.requestId, + content: err instanceof Error ? err.message : String(err), + isError: true, + }); + } catch (postErr) { + console.error( + '[host-browser-dispatcher] Failed to post error result for', + envelope.requestId, + postErr, + ); + } + } finally { + inFlight.delete(envelope.requestId); + } + } + + function cancel(envelope: HostBrowserCancelEnvelope): void { + inFlight.get(envelope.requestId)?.abort(); + inFlight.delete(envelope.requestId); + } + + function dispose(): void { + for (const abort of inFlight.values()) abort.abort(); + inFlight.clear(); + attachedTargets.clear(); + unsubscribeOnDetach(); + proxy.dispose(); + } + + return { handle, cancel, dispose }; +} diff --git a/clients/chrome-extension/background/relay-connection.ts b/clients/chrome-extension/background/relay-connection.ts new file mode 100644 index 00000000000..2a7549a16c8 --- /dev/null +++ b/clients/chrome-extension/background/relay-connection.ts @@ -0,0 +1,351 @@ +/** + * Relay WebSocket connection helper. + * + * Extracted from worker.ts so we can share the open/close/reconnect + * lifecycle between the two relay transports: + * + * - `self-hosted` — ws://127.0.0.1:/v1/browser-relay, token minted + * by the local daemon (legacy path; default for back-compat). + * - `cloud` — wss:///v1/browser-relay, token from + * the cloud OAuth flow (see cloud-auth.ts). + * + * The class only knows how to open the socket, forward incoming messages + * to the caller, and reconnect after unexpected closes. It does NOT parse + * relay messages — worker.ts owns the envelope dispatch (ExtensionCommand + * + host_browser_request via the PR 9 dispatcher) via the `onMessage` + * callback. + * + * This module also exports {@link postHostBrowserResult}, the relay-aware + * helper used by the host-browser dispatcher to ship CDP result envelopes + * back to the daemon. In self-hosted mode the result is POSTed to the + * local `/v1/host-browser-result` HTTP endpoint; in cloud mode it would + * round-trip back through the gateway WebSocket — see the function + * docstring for the current Phase 2 behaviour. + */ + +import type { HostBrowserResultEnvelope } from './host-browser-dispatcher.js'; + +/** Reconnect backoff bounds mirror the legacy inline worker.ts values. */ +const RECONNECT_BASE_MS = 1_000; +const RECONNECT_MAX_MS = 30_000; + +/** WebSocket close codes that represent intentional, non-error closures. */ +const NORMAL_CLOSE_CODES = new Set([1000, 1001]); + +/** + * Connection mode with the corresponding base URL + bearer token. The + * base URL is normalised by {@link RelayConnection.buildUrl}: any + * `http(s)://` scheme is rewritten to `ws(s)://` and a trailing slash is + * stripped. Pass the daemon's HTTP origin for self-hosted mode and the + * cloud gateway's HTTPS origin for cloud mode — the class figures out + * the WebSocket scheme. + */ +export type RelayMode = + | { kind: 'self-hosted'; baseUrl: string; token: string | null } + | { kind: 'cloud'; baseUrl: string; token: string | null }; + +export interface RelayConnectionDeps { + /** + * Mode + token. The token is pre-fetched by the caller (so the caller + * can decide whether to skip the connection entirely when there's no + * token yet, e.g. before cloud sign-in or before self-hosted pairing). + */ + mode: RelayMode; + /** Invoked with the raw string payload for every incoming message. */ + onMessage: (data: string) => void; + /** Invoked when the socket transitions to OPEN. */ + onOpen: () => void; + /** Invoked when the socket closes (user-initiated or unexpected). */ + onClose: (code: number, reason: string) => void; + /** + * Optional: invoked right before a reconnect attempt is scheduled for + * an unexpected close. Callers use this to refresh stale tokens before + * the next `start()` attempt. If this returns a string, the new token + * replaces `mode.token` for the next URL build. + */ + onReconnect?: () => Promise; +} + +/** + * Long-lived WebSocket helper. One instance per live relay session — + * switching modes closes the current socket and constructs a new one. + */ +export class RelayConnection { + private ws: WebSocket | null = null; + private deps: RelayConnectionDeps; + private reconnectTimer: ReturnType | null = null; + private reconnectDelay = RECONNECT_BASE_MS; + private closedByCaller = false; + + constructor(deps: RelayConnectionDeps) { + this.deps = deps; + } + + /** + * Return the live connection mode. Callers must invoke this right + * before each use — after a reconnect-with-refresh cycle the + * underlying `deps.mode` is replaced with a brand new object holding + * the freshly minted token, and any caller that cached the result of + * a previous invocation would still be using the stale token. In + * particular, worker.ts's `dispatchHostBrowserResult` MUST pull the + * mode through this accessor per-POST so that result envelopes sent + * after a WebSocket drop authenticate with the new bearer token + * instead of silently 401/403ing. + * + * This is the ONLY public accessor for the mode — there is + * deliberately no `get mode()` getter, because a property-style + * access reads as a static field and invites callers to cache it. + */ + getCurrentMode(): RelayMode { + return this.deps.mode; + } + + /** Is the underlying socket currently in the OPEN readyState? */ + isOpen(): boolean { + return this.ws !== null && this.ws.readyState === WebSocket.OPEN; + } + + /** Begin (or resume) connecting. Idempotent while already connected. */ + start(): void { + this.closedByCaller = false; + this.reconnectDelay = RECONNECT_BASE_MS; + this.connect(); + } + + /** + * Swap the connection mode / token without destroying the class + * instance. The current socket is closed cleanly and a fresh one is + * opened for the new mode. Used by the popup's mode switcher. + */ + setMode(mode: RelayMode): void { + this.deps = { ...this.deps, mode }; + // Tear down the current socket without marking the caller as having + // closed us permanently — `start()` below re-arms shouldConnect. + if (this.reconnectTimer !== null) { + clearTimeout(this.reconnectTimer); + this.reconnectTimer = null; + } + if (this.ws) { + try { + this.ws.close(1000, 'mode switched'); + } catch { + /* ignore */ + } + this.ws = null; + } + this.start(); + } + + /** + * Send a raw string payload. No-op if the socket is not currently OPEN + * — matches the existing worker.ts semantics where heartbeats and + * responses silently drop when the socket is mid-reconnect. + */ + send(data: string): void { + if (this.ws && this.ws.readyState === WebSocket.OPEN) { + this.ws.send(data); + } + } + + /** + * Close the socket permanently. After this the connection will not + * reconnect on its own; call `start()` again to resume. + */ + close(code = 1000, reason = 'closed by caller'): void { + this.closedByCaller = true; + if (this.reconnectTimer !== null) { + clearTimeout(this.reconnectTimer); + this.reconnectTimer = null; + } + if (this.ws) { + try { + this.ws.close(code, reason); + } catch { + /* ignore */ + } + this.ws = null; + } + } + + // ── Internals ───────────────────────────────────────────────────── + + private connect(): void { + if (this.ws && (this.ws.readyState === WebSocket.OPEN || this.ws.readyState === WebSocket.CONNECTING)) { + return; + } + + const url = this.buildUrl(); + // Capture a local reference to the socket so that every listener + // can verify it is still the active one before mutating shared + // state. Without this, a `setMode()` that closes socket A and + // immediately opens socket B can get A's asynchronous close event + // delivered afterward — that stale event would otherwise clear the + // reference to B and schedule a spurious reconnect. + const ws = new WebSocket(url); + this.ws = ws; + + ws.addEventListener('open', () => { + if (this.ws !== ws) return; // stale event from a superseded socket + this.reconnectDelay = RECONNECT_BASE_MS; + this.deps.onOpen(); + }); + + ws.addEventListener('message', (event: MessageEvent) => { + if (this.ws !== ws) return; // stale event from a superseded socket + this.deps.onMessage(String(event.data)); + }); + + ws.addEventListener('close', (event: CloseEvent) => { + if (this.ws !== ws) return; // stale event from a superseded socket + const code = event.code; + const reason = event.reason; + this.ws = null; + this.deps.onClose(code, reason); + if (!this.closedByCaller) { + if (!NORMAL_CLOSE_CODES.has(code)) { + this.scheduleReconnectWithRefresh(); + } else { + this.scheduleReconnect(); + } + } + }); + + ws.addEventListener('error', () => { + if (this.ws !== ws) return; // stale event from a superseded socket + // A close event will follow — nothing to do here beyond letting + // the socket transition into CLOSING/CLOSED so we can reconnect. + }); + } + + /** Build the WebSocket URL from the current mode. */ + private buildUrl(): string { + const { mode } = this.deps; + const base = mode.baseUrl.replace(/\/$/, ''); + const wsBase = base.replace(/^http/, 'ws'); + const url = `${wsBase}/v1/browser-relay`; + if (mode.token) { + return `${url}?token=${encodeURIComponent(mode.token)}`; + } + return url; + } + + private scheduleReconnect(): void { + if (this.reconnectTimer !== null) return; + const delay = this.reconnectDelay; + this.reconnectTimer = setTimeout(() => { + this.reconnectTimer = null; + if (!this.closedByCaller) this.connect(); + }, delay); + this.reconnectDelay = Math.min(this.reconnectDelay * 2, RECONNECT_MAX_MS); + } + + /** + * Unexpected close path: give the caller a chance to refresh the + * token (e.g. the self-hosted daemon rotated its edge JWT, or the + * cloud OAuth flow expired) before the next connect attempt. + */ + private scheduleReconnectWithRefresh(): void { + if (this.reconnectTimer !== null) return; + const delay = this.reconnectDelay; + this.reconnectTimer = setTimeout(async () => { + this.reconnectTimer = null; + if (this.closedByCaller) return; + if (this.deps.onReconnect) { + try { + const newToken = await this.deps.onReconnect(); + if (typeof newToken === 'string') { + this.deps = { + ...this.deps, + mode: { ...this.deps.mode, token: newToken }, + }; + } + } catch { + // Refresh failures fall through to a bare reconnect attempt — + // the server will reject the handshake and we'll loop. + } + } + if (!this.closedByCaller) this.connect(); + }, delay); + this.reconnectDelay = Math.min(this.reconnectDelay * 2, RECONNECT_MAX_MS); + } +} + +// ── host_browser result poster ───────────────────────────────────── +// +// The host-browser dispatcher needs a way to ship CDP result envelopes +// back to the daemon. The transport depends on the relay mode: +// +// - self-hosted: POST to the local daemon's +// `/v1/host-browser-result` endpoint, authenticated with the +// stored capability token. +// - cloud: send the envelope as a `host_browser_result` frame over +// the existing browser-relay WebSocket. The gateway proxies the +// frame straight through to the runtime — see +// `gateway/src/http/routes/browser-relay-websocket.ts`. (Phase 3 +// will land the runtime-side handler for inbound result frames; +// today the runtime drops them, but the cloud CDP path is +// feature-flagged off in Phase 2 so this is harmless.) + +/** + * Minimal subset of {@link RelayConnection} that {@link postHostBrowserResult} + * actually consumes. Used by tests to inject a fake without having to + * stand up a real WebSocket. + * + * `getCurrentMode()` is intentionally part of the surface so callers + * like worker.ts's `dispatchHostBrowserResult` can read the LIVE mode + * (including any refreshed token) straight off the connection instead + * of relying on a module-level snapshot captured at connect time. + */ +export interface RelayConnectionLike { + isOpen(): boolean; + send(data: string): void; + getCurrentMode(): RelayMode; +} + +/** + * Ship a host_browser result envelope back to the daemon. + * + * In self-hosted mode this POSTs to `${mode.baseUrl}/v1/host-browser-result` + * with `Authorization: Bearer `. In cloud mode it sends a + * `{ type: 'host_browser_result', ...result }` frame over the supplied + * relay connection. + * + * The cloud branch is a no-op (with a console.warn) when the connection + * is missing or not currently open. We deliberately do NOT throw — the + * dispatcher's error path catches and logs synchronously, but a thrown + * rejection here would bubble up to the service worker as an unhandled + * promise rejection. + */ +export async function postHostBrowserResult( + mode: RelayMode, + connection: RelayConnectionLike | null, + result: HostBrowserResultEnvelope, +): Promise { + if (mode.kind === 'cloud') { + if (!connection || !connection.isOpen()) { + console.warn( + '[vellum-relay] host-browser-result dropped: cloud relay not connected', + ); + return; + } + connection.send(JSON.stringify({ type: 'host_browser_result', ...result })); + return; + } + + // self-hosted: POST to the local daemon. The base URL is whatever + // `buildRelayModeConfig` resolved at connect time (usually + // `http://127.0.0.1:`). + const headers: Record = { 'content-type': 'application/json' }; + if (mode.token) headers.authorization = `Bearer ${mode.token}`; + const url = `${mode.baseUrl.replace(/\/$/, '')}/v1/host-browser-result`; + const resp = await fetch(url, { + method: 'POST', + headers, + body: JSON.stringify(result), + }); + if (!resp.ok) { + console.warn( + `[vellum-relay] host-browser-result POST returned ${resp.status}`, + ); + } +} diff --git a/clients/chrome-extension/background/self-hosted-auth.ts b/clients/chrome-extension/background/self-hosted-auth.ts new file mode 100644 index 00000000000..623f601af8b --- /dev/null +++ b/clients/chrome-extension/background/self-hosted-auth.ts @@ -0,0 +1,222 @@ +/** + * Self-hosted capability-token bootstrap for the Vellum chrome extension. + * + * Spawns the native messaging helper registered under the host name + * `com.vellum.daemon`, asks it to exchange the calling extension's origin + * for a capability token via the assistant's `/v1/browser-extension-pair` + * endpoint, and persists the returned token in `chrome.storage.local`. + * + * This module is the self-hosted counterpart to `cloud-auth.ts`: cloud + * sign-in uses chrome.identity.launchWebAuthFlow against the Vellum + * gateway, while self-hosted pairing uses native messaging to talk to a + * locally running assistant without needing an external OAuth round-trip. + * Users with a local assistant can pair their extension entirely offline. + * + * The token is not yet wired into the relay WebSocket — that plumbing + * lands in PR 14 of the host-browser-proxy Phase 2 plan. This module is + * the storage + bootstrap state machine layer only. + * + * Wire format notes: the native helper sends + * `{ type: "token_response", token, expiresAt }` where `expiresAt` is an + * ISO 8601 string (per the /v1/browser-extension-pair response shape). + * We parse it into an epoch-millis number here so the in-memory and + * on-disk representation matches `StoredCloudToken` and downstream code + * can rely on a single numeric expiry type across both transports. + */ + +export interface StoredLocalToken { + token: string; + expiresAt: number; // ms since epoch + guardianId: string; +} + +const STORAGE_KEY = 'vellum.localCapabilityToken'; +const NATIVE_HOST_NAME = 'com.vellum.daemon'; +const DEFAULT_BOOTSTRAP_TIMEOUT_MS = 5_000; + +export interface BootstrapLocalTokenOptions { + /** + * Override the native-messaging timeout. Exposed primarily so tests can + * run the timeout path without having to wait five real seconds; callers + * in the extension itself should rely on the default. + */ + timeoutMs?: number; +} + +export async function getStoredLocalToken(): Promise { + const result = await chrome.storage.local.get(STORAGE_KEY); + const raw = result[STORAGE_KEY]; + if (!raw || typeof raw !== 'object') return null; + const token = raw as StoredLocalToken; + if ( + typeof token.token !== 'string' || + typeof token.expiresAt !== 'number' || + typeof token.guardianId !== 'string' + ) { + return null; + } + if (token.expiresAt <= Date.now()) return null; + return token; +} + +export async function clearLocalToken(): Promise { + await chrome.storage.local.remove(STORAGE_KEY); +} + +async function persistLocalToken(token: StoredLocalToken): Promise { + await chrome.storage.local.set({ [STORAGE_KEY]: token }); +} + +/** + * Parse the helper's `expiresAt` field into an epoch-millis number. + * + * The native helper echoes whatever the assistant's /v1/browser-extension-pair + * endpoint returned, which is an ISO 8601 string per PR 11. We tolerate a + * numeric value as well (belt and braces) so a future helper change that + * forwards a raw number doesn't break the extension. + */ +function parseExpiresAt(raw: unknown): number | null { + if (typeof raw === 'number' && Number.isFinite(raw) && raw > 0) { + return raw; + } + if (typeof raw === 'string' && raw.length > 0) { + const parsed = Date.parse(raw); + if (Number.isFinite(parsed) && parsed > 0) return parsed; + } + return null; +} + +/** + * Spawns the native messaging helper via `chrome.runtime.connectNative`, + * posts a `request_token` frame, awaits the helper's `token_response`, + * and persists the returned capability token. + * + * Error handling: + * - If the helper emits `{ type: "error", message }`, rejects with that + * message. The helper uses this shape for allowlist violations, + * unreachable assistant, and malformed responses from the pair endpoint. + * - If the port disconnects before a response arrives, rejects with the + * `chrome.runtime.lastError` message (Chrome surfaces native-messaging + * spawn failures through this channel — e.g. the host manifest isn't + * installed, or the binary exited non-zero before writing a frame). + * - If no frame arrives within `DEFAULT_BOOTSTRAP_TIMEOUT_MS`, rejects + * with a timeout error and force-disconnects the port so the helper + * process doesn't leak. + */ +export async function bootstrapLocalToken( + options: BootstrapLocalTokenOptions = {}, +): Promise { + const timeoutMs = options.timeoutMs ?? DEFAULT_BOOTSTRAP_TIMEOUT_MS; + return new Promise((resolve, reject) => { + // `settled` is flipped synchronously the moment we observe a decisive + // frame (token_response / error / timeout / disconnect) so that a + // racing onDisconnect — Chrome sometimes closes the native port the + // instant the helper exits, even if we've already received a valid + // token frame — can't win the race and reject a successful pairing. + // + // Critically, for the token_response happy path we mark `settled` + // BEFORE awaiting `persistLocalToken`. If we waited until the storage + // write resolved, an onDisconnect firing during that microtask would + // still see `settled === false` and reject the promise despite having + // a valid in-memory token. + let settled = false; + const port = chrome.runtime.connectNative(NATIVE_HOST_NAME); + + const cleanup = (): void => { + clearTimeout(timer); + try { + port.disconnect(); + } catch { + // Chrome may have already torn the port down — ignore. + } + }; + + const finish = (fn: () => void): void => { + if (settled) return; + settled = true; + cleanup(); + fn(); + }; + + const timer: ReturnType = setTimeout(() => { + finish(() => reject(new Error('native messaging timeout'))); + }, timeoutMs); + + port.onMessage.addListener((msg: unknown) => { + if (settled) return; + if (!msg || typeof msg !== 'object') return; + const frame = msg as { + type?: unknown; + token?: unknown; + expiresAt?: unknown; + guardianId?: unknown; + message?: unknown; + }; + + if (frame.type === 'token_response') { + const expiresAt = parseExpiresAt(frame.expiresAt); + if ( + typeof frame.token !== 'string' || + expiresAt === null || + typeof frame.guardianId !== 'string' + ) { + finish(() => + reject(new Error('native messaging returned malformed token_response')), + ); + return; + } + const stored: StoredLocalToken = { + token: frame.token, + expiresAt, + guardianId: frame.guardianId, + }; + + // Mark settled + tear down the port SYNCHRONOUSLY so a racing + // onDisconnect listener can't reject the promise after we've + // already received a valid token. The persistence write below + // is awaited afterwards, but it no longer gates `settled`. + settled = true; + cleanup(); + + // Persist asynchronously. If the storage write fails, we log + // the error and resolve with the in-memory token anyway — the + // caller can still use it for the current session even if we + // couldn't durably save it. This also matches the comment + // above: a storage failure shouldn't block the caller from + // getting a token they just successfully negotiated. + persistLocalToken(stored).then( + () => resolve(stored), + (err: unknown) => { + const detail = err instanceof Error ? err.message : String(err); + console.warn( + `[vellum-relay] failed to persist local capability token: ${detail}`, + ); + resolve(stored); + }, + ); + return; + } + + if (frame.type === 'error') { + const message = typeof frame.message === 'string' ? frame.message : 'native messaging error'; + finish(() => reject(new Error(message))); + return; + } + + // Ignore any unrecognised frame types — the helper currently only + // emits `token_response` and `error`, but tolerating unknowns means + // a future protocol extension won't accidentally trip this path. + }); + + port.onDisconnect.addListener(() => { + if (settled) return; + const lastError = chrome.runtime.lastError; + const message = lastError?.message ?? 'native messaging disconnected before response'; + // `finish` will call port.disconnect() again, but that's a no-op + // after Chrome has already torn the port down on its side. + finish(() => reject(new Error(message))); + }); + + port.postMessage({ type: 'request_token' }); + }); +} diff --git a/clients/chrome-extension/background/worker.ts b/clients/chrome-extension/background/worker.ts index 6da667b313e..3095cd325bb 100644 --- a/clients/chrome-extension/background/worker.ts +++ b/clients/chrome-extension/background/worker.ts @@ -1,27 +1,165 @@ /** * Chrome MV3 service worker — browser-relay bridge. * - * Connects to ws://127.0.0.1:/v1/browser-relay and dispatches - * ExtensionCommands from the server to browser APIs, sending back - * ExtensionResponses. + * Connects to either + * - the local daemon's browser-relay endpoint + * (`ws://127.0.0.1:/v1/browser-relay`), or + * - the cloud gateway's browser-relay endpoint + * (`wss:///v1/browser-relay`) + * + * depending on the `vellum.relayMode` key in chrome.storage.local + * (default `"self-hosted"` for back-compat). Both transports share the + * same envelope vocabulary — the choice is strictly about where the + * socket points and which token is presented on the handshake. + * + * Once connected, the worker dispatches incoming server messages: + * - `host_browser_request` / `host_browser_cancel` envelopes are + * routed to the CDP proxy dispatcher (Phase 2 PR 9, gated behind + * the `vellum.cdpProxyEnabled` feature flag). + * - Every other payload is treated as a legacy `ExtensionCommand` + * and dispatched to the existing browser-API handlers. */ import type { ExtensionCommand, ExtensionResponse, ExtensionHeartbeat } from '../../../assistant/src/browser-extension-relay/protocol.js'; +import { + signInCloud, + getStoredToken as getStoredCloudToken, + type CloudAuthConfig, + type StoredCloudToken, +} from './cloud-auth.js'; +import { + bootstrapLocalToken, + type StoredLocalToken, +} from './self-hosted-auth.js'; +import { + createHostBrowserDispatcher, + type HostBrowserDispatcher, + type HostBrowserRequestEnvelope, + type HostBrowserCancelEnvelope, + type HostBrowserResultEnvelope, +} from './host-browser-dispatcher.js'; +import { + RelayConnection, + postHostBrowserResult, + type RelayMode, +} from './relay-connection.js'; + +// Cloud OAuth defaults — kept here so the popup can stay a thin client and the +// service worker is the single owner of the launchWebAuthFlow lifecycle. This +// avoids the MV3 popup teardown race where closing the popup mid-auth kills +// the awaited promise before the token is persisted. +const CLOUD_GATEWAY_BASE_URL = 'https://api.vellum.ai'; +const CLOUD_OAUTH_CLIENT_ID = 'vellum-chrome-extension'; const DEFAULT_RELAY_PORT = 7830; const HEARTBEAT_INTERVAL_MS = 30_000; -const RECONNECT_BASE_MS = 1_000; -const RECONNECT_MAX_MS = 30_000; const EXTENSION_VERSION = chrome.runtime.getManifest().version; -let ws: WebSocket | null = null; -let reconnectDelay = RECONNECT_BASE_MS; +// ── Mode selection (Phase 2 PR 14) ───────────────────────────────── +// +// Existing installs have no `vellum.relayMode` key and must keep using +// the local daemon transport. New installs can flip to cloud via the +// popup radio group. +const RELAY_MODE_KEY = 'vellum.relayMode'; +type RelayModeKind = 'self-hosted' | 'cloud'; + +function isRelayModeKind(v: unknown): v is RelayModeKind { + return v === 'self-hosted' || v === 'cloud'; +} + +let relayMode: RelayModeKind = 'self-hosted'; +let relayConnection: RelayConnection | null = null; let heartbeatTimer: ReturnType | null = null; let shouldConnect = false; -/** WebSocket close codes that represent intentional, non-error closures. */ -const NORMAL_CLOSE_CODES = new Set([1000, 1001]); +// ── Host browser dispatcher (Phase 2 PR 9) ────────────────────────── +// +// Feature-flagged behind `vellum.cdpProxyEnabled` in chrome.storage.local. +// When the flag is off (default), incoming `host_browser_request` / +// `host_browser_cancel` envelopes are ignored here and the legacy +// ExtensionCommand handlers below service all browser tool calls exactly +// as before. Phase 3 will flip the default and delete the legacy path. +const CDP_PROXY_ENABLED_KEY = 'vellum.cdpProxyEnabled'; + +let cdpProxyEnabled = false; + +async function resolveHostBrowserTarget( + cdpSessionId: string | undefined, +): Promise<{ tabId?: number; targetId?: string }> { + // When the daemon side has an explicit session id (e.g. a flat child + // session returned from a prior Target.attachToTarget) we route the + // command by targetId. Otherwise we fall back to the most recently + // active tab in the focused window — matching the implicit target + // selection the legacy ExtensionCommand handlers used. + if (cdpSessionId) { + return { targetId: cdpSessionId }; + } + const [activeTab] = await chrome.tabs.query({ active: true, lastFocusedWindow: true }); + if (activeTab?.id === undefined) { + throw new Error('No active tab available to resolve host_browser target'); + } + return { tabId: activeTab.id }; +} + +/** + * Bridge the host-browser dispatcher to the relay-aware + * {@link postHostBrowserResult} helper. + * + * The happy path pulls the current mode straight off the live + * {@link RelayConnection} via `getCurrentMode()`. This is load-bearing: + * when `scheduleReconnectWithRefresh` fires after a WebSocket drop, it + * mints a fresh token and replaces `deps.mode` with a brand new object. + * Reading via the accessor on every dispatch guarantees the next result + * POST uses the freshly minted bearer token — a captured snapshot would + * silently 401/403 forever. + * + * When no relay connection exists yet (a legacy ExtensionCommand path + * firing before `connect()` ran, or a stale result arriving after + * `disconnect()`), we fall back per the configured relay mode: + * + * - `self-hosted`: POST directly to the local daemon using live + * creds resolved from storage, matching pre-Phase 2 behaviour. + * - `cloud`: warn and drop the envelope. POSTing to localhost in + * cloud mode would always fail, and we have no WebSocket to + * round-trip through without an active connection. + */ +async function dispatchHostBrowserResult( + result: HostBrowserResultEnvelope, +): Promise { + if (relayConnection) { + // Read the live mode from the active connection so that + // reconnect-with-refresh token updates propagate to result POSTs + // automatically. + const currentMode = relayConnection.getCurrentMode(); + return postHostBrowserResult(currentMode, relayConnection, result); + } + + // Fallback path: no active connection (legacy ExtensionCommand path + // before `connect()` runs, or a stale result arriving after + // `disconnect()`). + if (relayMode === 'cloud') { + console.warn( + '[vellum-relay] host_browser_result dropped: cloud mode but relay not connected', + ); + return; + } + + // Self-hosted fallback: POST directly to the local daemon using live + // creds. + const [token, port] = await Promise.all([getBearerToken(), getRelayPort()]); + const fallbackMode: RelayMode = { + kind: 'self-hosted', + baseUrl: `http://127.0.0.1:${port}`, + token, + }; + return postHostBrowserResult(fallbackMode, null, result); +} + +const hostBrowserDispatcher: HostBrowserDispatcher = createHostBrowserDispatcher({ + resolveTarget: resolveHostBrowserTarget, + postResult: dispatchHostBrowserResult, +}); // ── Storage helpers ───────────────────────────────────────────────── @@ -61,68 +199,167 @@ async function refreshToken(): Promise { } } -// ── WebSocket lifecycle ───────────────────────────────────────────── +// ── Relay connection lifecycle ────────────────────────────────────── -async function connect(): Promise { - if (ws && (ws.readyState === WebSocket.OPEN || ws.readyState === WebSocket.CONNECTING)) { - return; - } +async function loadRelayMode(): Promise { + const result = await chrome.storage.local.get(RELAY_MODE_KEY); + const stored = result[RELAY_MODE_KEY]; + return isRelayModeKind(stored) ? stored : 'self-hosted'; +} +async function buildRelayModeConfig(kind: RelayModeKind): Promise { + if (kind === 'cloud') { + const stored = await getStoredCloudToken(); + return { + kind: 'cloud', + baseUrl: CLOUD_GATEWAY_BASE_URL, + token: stored?.token ?? null, + }; + } + // Self-hosted: re-use the existing local-token flow. The plan explicitly + // defers the switch to PR 13's getStoredLocalToken() to a follow-up. const [token, port] = await Promise.all([getBearerToken(), getRelayPort()]); - const relayUrlBase = `ws://127.0.0.1:${port}/v1/browser-relay`; - const url = token ? `${relayUrlBase}?token=${encodeURIComponent(token)}` : relayUrlBase; - - ws = new WebSocket(url); + return { + kind: 'self-hosted', + baseUrl: `http://127.0.0.1:${port}`, + token, + }; +} - ws.addEventListener('open', () => { - console.log('[vellum-relay] Connected to relay server'); - reconnectDelay = RECONNECT_BASE_MS; - startHeartbeat(); +/** + * Wire a RelayConnection up with the worker's message/open/close + * callbacks. Does NOT start it. + */ +function createRelayConnection(mode: RelayMode): RelayConnection { + return new RelayConnection({ + mode, + onOpen: () => { + console.log(`[vellum-relay] Connected (${mode.kind})`); + startHeartbeat(); + }, + onMessage: (data) => { + // Fire-and-forget dispatch — wrap with .catch so a future refactor + // can't leak an unhandled rejection into the service worker and + // tear down the relay socket unexpectedly. + void handleServerMessage(data).catch((err) => { + console.warn('[vellum-relay] handleServerMessage failed', err); + }); + }, + onClose: (code, reason) => { + console.log(`[vellum-relay] Disconnected (code=${code}, reason=${reason || 'n/a'})`); + stopHeartbeat(); + }, + onReconnect: async () => { + // Self-hosted: attempt to mint a fresh gateway token. Cloud: no-op + // for now — the cloud token is stored independently via OAuth and + // we'd rather surface the failure to the user than silently loop. + if (mode.kind === 'self-hosted') { + const ok = await refreshToken(); + if (ok) { + const refreshed = await getBearerToken(); + return refreshed; + } + } + }, }); +} - ws.addEventListener('message', (event) => { - handleServerMessage(event.data as string); - }); +/** + * Thrown by `connect()` when the selected relay mode has no usable + * token yet. Callers (e.g. the popup connect handler) surface the + * message verbatim to the user so they can take action — signing in + * to cloud or re-pairing the local daemon — instead of seeing a + * silent no-op after pressing "Connect". + */ +class MissingTokenError extends Error { + constructor(message: string) { + super(message); + this.name = 'MissingTokenError'; + } +} - ws.addEventListener('close', (event) => { - console.log(`[vellum-relay] Disconnected (code=${event.code}). Reconnecting in ${reconnectDelay}ms…`); - stopHeartbeat(); - ws = null; - if (shouldConnect) { - if (!NORMAL_CLOSE_CODES.has(event.code)) { - // Any unexpected close (including 1006 from failed HTTP 401 handshakes, - // 1008, 4001, etc.) — attempt a token refresh before reconnecting. - refreshToken().then(() => scheduleReconnect()); - } else { - scheduleReconnect(); - } - } - }); +function missingTokenMessage(kind: RelayModeKind): string { + if (kind === 'cloud') { + return 'Sign in with Vellum (cloud) before connecting'; + } + return 'Pair the Vellum assistant (self-hosted) before connecting'; +} - ws.addEventListener('error', () => { - // close event will follow; just log - console.warn('[vellum-relay] WebSocket error'); - }); +async function connect(): Promise { + if (relayConnection && relayConnection.isOpen()) return; + // Re-read the live relay mode from storage at connect time. The + // module-level `relayMode` variable is only refreshed asynchronously + // via chrome.storage.onChanged, so trusting it races against a popup + // that toggles the radio and immediately clicks Connect. Reading from + // storage here makes the connect flow deterministic. + // + // The module-level `relayMode` is still updated to match so other code + // paths (status queries, disconnect, result routing) stay consistent + // with the mode we're about to connect with. + const liveMode = await loadRelayMode(); + relayMode = liveMode; + const mode = await buildRelayModeConfig(liveMode); + if (!mode.token) { + const msg = missingTokenMessage(mode.kind); + console.warn(`[vellum-relay] ${msg}`); + throw new MissingTokenError(msg); + } + // Tear down any stale instance before constructing a new one. This + // keeps the close/reconnect lifecycle simple — one RelayConnection + // per live socket, no hidden state carried across mode switches. + if (relayConnection) { + relayConnection.close(1000, 'reconfigured'); + } + relayConnection = createRelayConnection(mode); + relayConnection.start(); +} + +function disconnect(): void { + stopHeartbeat(); + if (relayConnection) { + relayConnection.close(1000, 'User disconnected'); + relayConnection = null; + } } -function scheduleReconnect(): void { - setTimeout(() => { - if (shouldConnect) connect(); - }, reconnectDelay); - reconnectDelay = Math.min(reconnectDelay * 2, RECONNECT_MAX_MS); +/** + * Handle a runtime switch of `vellum.relayMode` (e.g. the popup radio + * group flipped). Closes any current socket and opens a new one in the + * new mode — see plan PR 14 step 2. + */ +async function applyModeChange(newKind: RelayModeKind): Promise { + if (newKind === relayMode) return; + relayMode = newKind; + if (!shouldConnect) return; + disconnect(); + try { + await connect(); + } catch (err) { + // The user switched modes before signing in / pairing. Leave the + // extension disconnected and let the next user-initiated connect + // bubble the error up through the popup message handler. + if (err instanceof MissingTokenError) { + shouldConnect = false; + console.warn( + `[vellum-relay] Mode switch to ${newKind} left disconnected: ${err.message}`, + ); + return; + } + throw err; + } } function startHeartbeat(): void { stopHeartbeat(); heartbeatTimer = setInterval(async () => { - if (!ws || ws.readyState !== WebSocket.OPEN) return; + if (!relayConnection || !relayConnection.isOpen()) return; const tabs = await chrome.tabs.query({}); const heartbeat: ExtensionHeartbeat = { type: 'heartbeat', extensionVersion: EXTENSION_VERSION, connectedTabs: tabs.length, }; - ws.send(JSON.stringify(heartbeat)); + relayConnection.send(JSON.stringify(heartbeat)); }, HEARTBEAT_INTERVAL_MS); } @@ -134,22 +371,48 @@ function stopHeartbeat(): void { } function sendResponse(response: ExtensionResponse): void { - if (ws && ws.readyState === WebSocket.OPEN) { - ws.send(JSON.stringify(response)); + if (relayConnection && relayConnection.isOpen()) { + relayConnection.send(JSON.stringify(response)); } } // ── Command dispatch ──────────────────────────────────────────────── async function handleServerMessage(raw: string): Promise { - let cmd: ExtensionCommand; + let parsed: unknown; try { - cmd = JSON.parse(raw) as ExtensionCommand; + parsed = JSON.parse(raw); } catch { console.warn('[vellum-relay] Failed to parse server message'); return; } + // Phase 2 PR 9: host_browser_* envelopes are dispatched via the CDP proxy + // only when the feature flag is on. With the flag off we return early and + // let the daemon's host-browser-proxy time out gracefully — the envelope + // is NOT forwarded to the legacy ExtensionCommand dispatch because its + // shape is incompatible. + if ( + parsed !== null && + typeof parsed === 'object' && + 'type' in parsed && + typeof (parsed as { type: unknown }).type === 'string' + ) { + const envelopeType = (parsed as { type: string }).type; + if (envelopeType === 'host_browser_request') { + if (!cdpProxyEnabled) return; + await hostBrowserDispatcher.handle(parsed as HostBrowserRequestEnvelope); + return; + } + if (envelopeType === 'host_browser_cancel') { + if (cdpProxyEnabled) { + hostBrowserDispatcher.cancel(parsed as HostBrowserCancelEnvelope); + } + return; + } + } + + const cmd = parsed as ExtensionCommand; try { const result = await dispatch(cmd); sendResponse({ id: cmd.id, success: true, ...result }); @@ -325,32 +588,122 @@ async function handleScreenshot(cmd: ExtensionCommand): Promise // ── Extension message listener (from popup) ───────────────────────── -chrome.runtime.onMessage.addListener((message, _sender, sendResponse) => { +chrome.runtime.onMessage.addListener((message, _sender, sendResponseFn) => { if (message.type === 'connect') { shouldConnect = true; - connect().then(() => sendResponse({ ok: true })).catch((err) => sendResponse({ ok: false, error: String(err) })); + connect() + .then(() => sendResponseFn({ ok: true })) + .catch((err) => { + // Reset shouldConnect so a subsequent storage change or + // bootstrap doesn't silently retry a doomed connect. The user + // will press Connect again after signing in / pairing. + shouldConnect = false; + const errorMessage = err instanceof Error ? err.message : String(err); + sendResponseFn({ ok: false, error: errorMessage }); + }); return true; // async } if (message.type === 'disconnect') { shouldConnect = false; - ws?.close(1000, 'User disconnected'); - sendResponse({ ok: true }); + disconnect(); + sendResponseFn({ ok: true }); return false; } if (message.type === 'get_status') { - sendResponse({ - connected: ws !== null && ws.readyState === WebSocket.OPEN, + sendResponseFn({ + connected: relayConnection !== null && relayConnection.isOpen(), + mode: relayMode, }); return false; } + if (message.type === 'cloud-auth-sign-in') { + // Run the OAuth flow in the service worker — not the popup — so the + // awaited promise survives the popup losing focus during the Chrome + // identity window. The popup just awaits this message response. + const config: CloudAuthConfig = { + gatewayBaseUrl: + typeof message.gatewayBaseUrl === 'string' ? message.gatewayBaseUrl : CLOUD_GATEWAY_BASE_URL, + clientId: + typeof message.clientId === 'string' ? message.clientId : CLOUD_OAUTH_CLIENT_ID, + }; + signInCloud(config) + .then((stored: StoredCloudToken) => sendResponseFn({ ok: true, token: stored })) + .catch((err) => sendResponseFn({ ok: false, error: err instanceof Error ? err.message : String(err) })); + return true; // async + } + if (message.type === 'self-hosted-pair') { + // Mirror the cloud-auth-sign-in pattern: run the native-messaging + // bootstrap in the service worker so the popup closing mid-pair + // can't tear down the awaited promise before the token is persisted. + // chrome.runtime.connectNative also requires the "nativeMessaging" + // permission, which is declared in manifest.json. + // + // IMPORTANT: use `sendResponseFn` (the chrome.runtime.onMessage + // callback) — NOT the module-level `sendResponse` helper, which + // forwards to the WebSocket relay and would leave the popup's + // requestLocalPair() promise hanging forever. + bootstrapLocalToken() + .then((stored: StoredLocalToken) => sendResponseFn({ ok: true, token: stored })) + .catch((err) => sendResponseFn({ ok: false, error: err instanceof Error ? err.message : String(err) })); + return true; // async + } }); // Auto-connect on service worker start if previously connected. -// Refresh the token first so we don't reconnect with stale credentials. -chrome.storage.local.get('autoConnect').then(async (result) => { - if (result.autoConnect === true) { - shouldConnect = true; +// Refresh the self-hosted token first so we don't reconnect with stale +// credentials — cloud-mode auto-connect just reads the stored OAuth +// token and trusts the caller to re-sign in if it's expired. +async function bootstrap(): Promise { + relayMode = await loadRelayMode(); + const { autoConnect } = await chrome.storage.local.get('autoConnect'); + if (autoConnect !== true) return; + shouldConnect = true; + if (relayMode === 'self-hosted') { await refreshToken(); - connect(); + } + try { + await connect(); + } catch (err) { + // A missing token at auto-connect time is not a hard failure — + // the user will see the disconnected state in the popup and can + // sign in / pair to try again. Log and move on. + if (err instanceof MissingTokenError) { + shouldConnect = false; + console.warn(`[vellum-relay] Skipping auto-connect: ${err.message}`); + return; + } + throw err; + } +} + +bootstrap(); + +// Load the CDP proxy feature flag at startup. Missing / non-boolean values +// are treated as false so existing deployments exhibit no behavior change. +chrome.storage.local.get(CDP_PROXY_ENABLED_KEY).then((result) => { + const value = result[CDP_PROXY_ENABLED_KEY]; + cdpProxyEnabled = value === true; + if (cdpProxyEnabled) { + console.log('[vellum-relay] CDP proxy enabled (beta)'); + } +}); + +// Keep feature flag + relay mode live-updatable from the popup without +// requiring the service worker to restart. +chrome.storage.onChanged.addListener((changes, areaName) => { + if (areaName !== 'local') return; + if (CDP_PROXY_ENABLED_KEY in changes) { + const newValue = changes[CDP_PROXY_ENABLED_KEY]?.newValue; + cdpProxyEnabled = newValue === true; + console.log( + `[vellum-relay] CDP proxy feature flag updated: ${cdpProxyEnabled}`, + ); + } + if (RELAY_MODE_KEY in changes) { + const newValue = changes[RELAY_MODE_KEY]?.newValue; + if (isRelayModeKind(newValue)) { + console.log(`[vellum-relay] Relay mode updated: ${newValue}`); + void applyModeChange(newValue); + } } }); diff --git a/clients/chrome-extension/manifest.json b/clients/chrome-extension/manifest.json index dd555c25676..680d17b417b 100644 --- a/clients/chrome-extension/manifest.json +++ b/clients/chrome-extension/manifest.json @@ -5,12 +5,14 @@ "description": "Bridges the Vellum assistant to your live browser session — no CDP, no spoofing.", "permissions": [ - "tabs", "activeTab", - "scripting", "cookies", + "debugger", + "identity", + "nativeMessaging", + "scripting", "storage", - "debugger" + "tabs" ], "host_permissions": [ "" diff --git a/clients/chrome-extension/popup/popup.html b/clients/chrome-extension/popup/popup.html index aad35a18b98..46e3217c10b 100644 --- a/clients/chrome-extension/popup/popup.html +++ b/clients/chrome-extension/popup/popup.html @@ -95,6 +95,96 @@ } #btn-disconnect:hover:not(:disabled) { background: #e5e7eb; } + .divider { + height: 1px; + background: #e5e7eb; + margin: 16px 0 14px 0; + } + + .section-label { + font-size: 11px; + font-weight: 600; + color: #6b7280; + text-transform: uppercase; + letter-spacing: 0.04em; + margin-bottom: 8px; + } + + #btn-cloud-signin { + width: 100%; + background: #111827; + color: #fff; + margin-bottom: 8px; + } + #btn-cloud-signin:hover:not(:disabled) { background: #1f2937; } + + #btn-pair-local { + width: 100%; + background: #0f766e; + color: #fff; + margin-bottom: 8px; + } + #btn-pair-local:hover:not(:disabled) { background: #115e59; } + + .cloud-status { + font-size: 12px; + color: #4b5563; + margin-bottom: 4px; + word-break: break-all; + } + .cloud-status.signed-in { color: #0f766e; } + + .local-status { + font-size: 12px; + color: #4b5563; + margin-bottom: 4px; + word-break: break-all; + } + .local-status.paired { color: #0f766e; } + .local-status.error { color: #ef4444; } + + .beta-toggle { + display: flex; + align-items: center; + gap: 8px; + font-size: 12px; + color: #4b5563; + margin-top: 10px; + cursor: pointer; + user-select: none; + } + .beta-toggle input[type="checkbox"] { + margin: 0; + cursor: pointer; + } + + .mode-group { + margin-bottom: 14px; + } + + .mode-radio-row { + display: flex; + gap: 14px; + margin-top: 4px; + } + + .mode-radio-row label { + display: flex; + align-items: center; + gap: 6px; + font-size: 12px; + font-weight: 500; + color: #374151; + cursor: pointer; + user-select: none; + margin-bottom: 0; + } + + .mode-radio-row input[type="radio"] { + margin: 0; + cursor: pointer; + } + .hint { font-size: 11px; color: #9ca3af; @@ -135,6 +225,20 @@

Vellum Relay

+
+ +
+ + +
+
+
@@ -163,6 +267,23 @@

Vellum Relay

Token is auto-fetched from the local gateway. Port defaults to 7830.

+
+ + +

Not paired

+ + +
+ + +

Not signed in

+ + + + diff --git a/clients/chrome-extension/popup/popup.ts b/clients/chrome-extension/popup/popup.ts index c264360b7f4..9270f5aaa58 100644 --- a/clients/chrome-extension/popup/popup.ts +++ b/clients/chrome-extension/popup/popup.ts @@ -3,8 +3,22 @@ * * Auto-fetches a bearer token from the local gateway on Connect. * Falls back to manual token entry if the gateway is unreachable. + * + * Also exposes a "Sign in with Vellum (cloud)" button. The actual OAuth + * flow runs in the background service worker (see worker.ts) — the popup + * only sends a message asking the worker to start it. This avoids the + * MV3 popup teardown race where closing the popup mid-auth would kill + * the awaited launchWebAuthFlow promise before the token was persisted. + * Cloud sign-in and self-hosted token entry coexist — they represent + * the two possible relay transports. */ +import { getStoredToken, type StoredCloudToken } from '../background/cloud-auth.js'; +import { + getStoredLocalToken, + type StoredLocalToken, +} from '../background/self-hosted-auth.js'; + const DEFAULT_RELAY_PORT = 7830; const tokenInput = document.getElementById('token-input') as HTMLInputElement; @@ -16,6 +30,17 @@ const statusText = document.getElementById('status-text') as HTMLParagraphElemen const errorText = document.getElementById('error-text') as HTMLParagraphElement; const manualToggle = document.getElementById('manual-toggle') as HTMLButtonElement; const tokenGroup = document.getElementById('token-group') as HTMLDivElement; +const btnCloudSignIn = document.getElementById('btn-cloud-signin') as HTMLButtonElement; +const cloudStatus = document.getElementById('cloud-status') as HTMLParagraphElement; +const btnPairLocal = document.getElementById('btn-pair-local') as HTMLButtonElement; +const localStatus = document.getElementById('local-status') as HTMLParagraphElement; +const cdpProxyToggle = document.getElementById('cdp-proxy-toggle') as HTMLInputElement; +const modeSelfHosted = document.getElementById('mode-self-hosted') as HTMLInputElement; +const modeCloud = document.getElementById('mode-cloud') as HTMLInputElement; + +const CDP_PROXY_ENABLED_KEY = 'vellum.cdpProxyEnabled'; +const RELAY_MODE_KEY = 'vellum.relayMode'; +type RelayModeKind = 'self-hosted' | 'cloud'; let manualMode = false; @@ -91,13 +116,32 @@ btnConnect.addEventListener('click', async () => { errorText.style.display = 'none'; + // Read the current relay mode so we know whether to auto-fetch a local + // daemon token. In cloud mode the worker uses the stored cloud token + // (vellum.cloudAuthToken) directly, so the popup must NOT try to hit + // localhost — a cloud-only user may not have a local assistant running. + // + // We prefer the radio button's checked state as a tiebreaker: if the + // user just toggled the radio, the async chrome.storage.local.set from + // handleModeChange() may not have landed yet. The DOM is the source of + // truth for the user's current intent. + const modeStorage = await chrome.storage.local.get(RELAY_MODE_KEY); + const storedMode = modeStorage[RELAY_MODE_KEY]; + const relayMode: RelayModeKind = modeCloud.checked + ? 'cloud' + : modeSelfHosted.checked + ? 'self-hosted' + : storedMode === 'cloud' + ? 'cloud' + : 'self-hosted'; + // Only honour the manual token input when the user has explicitly revealed - // it. When manual mode is hidden, always auto-fetch a fresh token from the - // gateway so we never silently reuse an expired JWT that was pre-loaded from - // storage. + // it. When manual mode is hidden and we're in self-hosted mode, auto-fetch + // a fresh token from the gateway so we never silently reuse an expired JWT + // that was pre-loaded from storage. let token = manualMode ? tokenInput.value.trim() : ''; - if (!token) { + if (!token && relayMode === 'self-hosted') { try { btnConnect.disabled = true; statusText.textContent = 'Fetching token…'; @@ -110,6 +154,10 @@ btnConnect.addEventListener('click', async () => { } } + // In cloud mode with no manual token we proceed with no bearerToken — + // the worker reads vellum.cloudAuthToken from chrome.storage when it + // builds the relay mode config in buildRelayModeConfig(). + if (token) storageUpdate.bearerToken = token; if (portInput.value.trim()) { storageUpdate.relayPort = port; @@ -143,3 +191,191 @@ btnDisconnect.addEventListener('click', () => { setConnected(false); }); }); + +// ── Self-hosted native-messaging pairing (new in Phase 2 PR 13) ───── +// +// Pairing runs the local native messaging helper (com.vellum.daemon), +// which POSTs the extension's origin to the assistant's +// `/v1/browser-extension-pair` endpoint and returns a capability token. +// The token is persisted in chrome.storage.local under +// `vellum.localCapabilityToken`. It is NOT yet used on any WebSocket — +// PR 14 will read it when opening the relay connection in self-hosted +// mode. + +function setLocalStatus(text: string, state: 'neutral' | 'paired' | 'error'): void { + localStatus.textContent = text; + localStatus.classList.remove('paired', 'error'); + if (state !== 'neutral') localStatus.classList.add(state); +} + +function formatLocalTokenStatus(token: StoredLocalToken): string { + const expiresDate = new Date(token.expiresAt); + const expiresStr = Number.isFinite(token.expiresAt) + ? expiresDate.toLocaleString() + : 'unknown'; + return `Paired as guardian:${token.guardianId} (expires ${expiresStr})`; +} + +async function refreshLocalStatus(): Promise { + try { + const existing = await getStoredLocalToken(); + if (existing) { + setLocalStatus(formatLocalTokenStatus(existing), 'paired'); + } else { + setLocalStatus('Not paired', 'neutral'); + } + } catch (err) { + setLocalStatus( + `Error: ${err instanceof Error ? err.message : String(err)}`, + 'error', + ); + } +} + +interface LocalPairResponse { + ok: boolean; + token?: StoredLocalToken; + error?: string; +} + +function requestLocalPair(): Promise { + return new Promise((resolve) => { + chrome.runtime.sendMessage({ type: 'self-hosted-pair' }, (response: LocalPairResponse) => { + if (chrome.runtime.lastError) { + resolve({ ok: false, error: chrome.runtime.lastError.message ?? 'Unknown error' }); + return; + } + resolve(response ?? { ok: false, error: 'No response from service worker' }); + }); + }); +} + +btnPairLocal.addEventListener('click', async () => { + btnPairLocal.disabled = true; + setLocalStatus('Pairing…', 'neutral'); + // Delegate to the service worker so the native-messaging bootstrap + // survives the popup teardown race — see the `self-hosted-pair` + // handler in worker.ts, and the matching cloud-auth-sign-in pattern. + const response = await requestLocalPair(); + if (response.ok && response.token) { + setLocalStatus(formatLocalTokenStatus(response.token), 'paired'); + } else { + setLocalStatus(`Pairing failed: ${response.error ?? 'Unknown error'}`, 'error'); + } + btnPairLocal.disabled = false; +}); + +refreshLocalStatus(); + +// ── Cloud sign-in (new in Phase 2 PR 8) ──────────────────────────── +// +// This is a skeleton: the token is persisted but not yet used on any +// WebSocket. A later PR will plumb it through the relay connection so +// cloud-hosted users can connect to the Vellum gateway without running +// a local daemon. + +function setCloudStatus(text: string, signedIn: boolean): void { + cloudStatus.textContent = text; + cloudStatus.classList.toggle('signed-in', signedIn); +} + +async function refreshCloudStatus(): Promise { + try { + const existing = await getStoredToken(); + if (existing) { + setCloudStatus(`Signed in as guardian:${existing.guardianId}`, true); + } else { + setCloudStatus('Not signed in', false); + } + } catch (err) { + setCloudStatus(`Error: ${err instanceof Error ? err.message : String(err)}`, false); + } +} + +interface CloudSignInResponse { + ok: boolean; + token?: StoredCloudToken; + error?: string; +} + +function requestCloudSignIn(): Promise { + return new Promise((resolve) => { + chrome.runtime.sendMessage({ type: 'cloud-auth-sign-in' }, (response: CloudSignInResponse) => { + if (chrome.runtime.lastError) { + resolve({ ok: false, error: chrome.runtime.lastError.message ?? 'Unknown error' }); + return; + } + resolve(response ?? { ok: false, error: 'No response from service worker' }); + }); + }); +} + +btnCloudSignIn.addEventListener('click', async () => { + btnCloudSignIn.disabled = true; + setCloudStatus('Signing in…', false); + // Delegate to the service worker — see header comment for the rationale. + const response = await requestCloudSignIn(); + if (response.ok && response.token) { + setCloudStatus(`Signed in as guardian:${response.token.guardianId}`, true); + } else { + setCloudStatus(`Sign-in failed: ${response.error ?? 'Unknown error'}`, false); + } + btnCloudSignIn.disabled = false; +}); + +refreshCloudStatus(); + +// ── CDP proxy beta toggle (Phase 2 PR 9) ────────────────────────── +// +// Persists `vellum.cdpProxyEnabled` in chrome.storage.local. The service +// worker reads this flag at startup and listens for changes via +// chrome.storage.onChanged, so no reconnect is needed — flipping the +// checkbox takes effect on the next incoming host_browser_request frame. + +chrome.storage.local.get(CDP_PROXY_ENABLED_KEY).then((result) => { + cdpProxyToggle.checked = result[CDP_PROXY_ENABLED_KEY] === true; +}); + +cdpProxyToggle.addEventListener('change', async () => { + await chrome.storage.local.set({ [CDP_PROXY_ENABLED_KEY]: cdpProxyToggle.checked }); +}); + +// ── Relay mode switcher (Phase 2 PR 14) ──────────────────────────── +// +// Flips `vellum.relayMode` in chrome.storage.local between "self-hosted" +// (default, back-compat) and "cloud". The service worker listens for +// storage changes via chrome.storage.onChanged and closes the current +// socket + reopens a new one against the selected transport. + +function isRelayModeKind(v: unknown): v is RelayModeKind { + return v === 'self-hosted' || v === 'cloud'; +} + +chrome.storage.local.get(RELAY_MODE_KEY).then((result) => { + const stored = result[RELAY_MODE_KEY]; + const mode: RelayModeKind = isRelayModeKind(stored) ? stored : 'self-hosted'; + if (mode === 'cloud') { + modeCloud.checked = true; + } else { + modeSelfHosted.checked = true; + } +}); + +async function handleModeChange(newMode: RelayModeKind): Promise { + await chrome.storage.local.set({ [RELAY_MODE_KEY]: newMode }); + // The service worker reacts to the storage change via + // chrome.storage.onChanged — we don't need to send an explicit + // disconnect/connect message here. +} + +modeSelfHosted.addEventListener('change', () => { + if (modeSelfHosted.checked) { + void handleModeChange('self-hosted'); + } +}); + +modeCloud.addEventListener('change', () => { + if (modeCloud.checked) { + void handleModeChange('cloud'); + } +}); diff --git a/clients/chrome-extension/tsconfig.json b/clients/chrome-extension/tsconfig.json new file mode 100644 index 00000000000..300f3cb23e7 --- /dev/null +++ b/clients/chrome-extension/tsconfig.json @@ -0,0 +1,23 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "NodeNext", + "moduleResolution": "NodeNext", + "lib": ["ES2022", "DOM"], + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true, + "resolveJsonModule": true, + "noEmit": true, + "allowJs": false, + "isolatedModules": true, + "types": [] + }, + "include": [ + "background/**/*.ts", + "popup/**/*.ts", + "types/**/*.d.ts" + ], + "exclude": ["dist/**", "node_modules/**"] +} diff --git a/clients/chrome-extension/types/bun-test-shim.d.ts b/clients/chrome-extension/types/bun-test-shim.d.ts new file mode 100644 index 00000000000..0b0277867d3 --- /dev/null +++ b/clients/chrome-extension/types/bun-test-shim.d.ts @@ -0,0 +1,58 @@ +/** + * Minimal shim for the `bun:test` module so the chrome-extension tests + * can be type-checked without depending on bun-types being installed in + * the chrome-extension's own node_modules. The full bun-types package is + * available in assistant/node_modules and is the runtime source of truth; + * this shim only declares the surface used by the extension's unit tests. + */ + +declare module 'bun:test' { + type TestCallback = () => void | Promise; + + /** Runnable test: body is required. Applies to test(), test.skip(), test.only(). */ + type RunnableTestFn = (name: string, fn: TestCallback) => void; + + /** Permissive variant for test.todo — a body is optional because + * a todo can declare intent to write a test in the future. */ + type TodoTestFn = (name: string, fn?: TestCallback) => void; + + interface TestApi extends RunnableTestFn { + /** Mark a test as a TODO — reports as 'todo' rather than 'passed'. + * Use for planned tests that haven't been written yet. */ + todo: TodoTestFn; + /** Skip a test temporarily. Use sparingly; prefer removing or fixing. */ + skip: RunnableTestFn; + /** Run only this test. Do not commit .only() calls. */ + only: RunnableTestFn; + } + + interface DescribeApi { + (name: string, fn: () => void): void; + skip(name: string, fn: () => void): void; + only(name: string, fn: () => void): void; + } + + export const test: TestApi; + export const describe: DescribeApi; + export function beforeEach(fn: TestCallback): void; + export function afterEach(fn: TestCallback): void; + export function beforeAll(fn: TestCallback): void; + export function afterAll(fn: TestCallback): void; + + interface Matchers { + toBe(expected: unknown): R; + toEqual(expected: unknown): R; + toBeNull(): R; + toBeUndefined(): R; + toBeGreaterThanOrEqual(expected: number): R; + toBeLessThanOrEqual(expected: number): R; + toContain(expected: unknown): R; + not: Matchers; + rejects: { + toThrow(expected?: string | RegExp | Error): Promise; + }; + toThrow(expected?: string | RegExp | Error): R; + } + + export function expect(actual: T): Matchers; +} diff --git a/clients/chrome-extension/types/chrome-globals.d.ts b/clients/chrome-extension/types/chrome-globals.d.ts new file mode 100644 index 00000000000..d9fd7c29cfe --- /dev/null +++ b/clients/chrome-extension/types/chrome-globals.d.ts @@ -0,0 +1,304 @@ +/// + +/** + * Minimal ambient declarations for the subset of the Chrome Extension API + * surface used by the Vellum browser-relay extension's typed modules. + * + * This is intentionally narrow — it covers what's needed by the + * typechecked files under `background/` and `popup/`, not the full + * Chrome API surface. The full `@types/chrome` package is an option for + * the future if we type-check more of the package or need additional + * API surface that this file doesn't cover. + * + * Note: `debugger` is a reserved word in TypeScript so we cannot declare + * a `namespace chrome.debugger`. Instead, `chrome` is declared as a + * top-level `const` whose type is an interface — that shape can include + * a `debugger` property because object literal property names may use + * reserved words. + */ + +interface ChromeStorageArea { + get(keys?: string | string[] | Record | null): Promise>; + set(items: Record): Promise; + remove(keys: string | string[]): Promise; + clear(): Promise; +} + +interface ChromeStorageChange { + newValue?: unknown; + oldValue?: unknown; +} + +type ChromeStorageAreaName = 'local' | 'sync' | 'managed' | 'session'; + +interface ChromeStorageChangedEvent { + addListener( + listener: ( + changes: Record, + areaName: ChromeStorageAreaName, + ) => void, + ): void; + removeListener( + listener: ( + changes: Record, + areaName: ChromeStorageAreaName, + ) => void, + ): void; +} + +interface ChromeStorageNamespace { + local: ChromeStorageArea; + sync: ChromeStorageArea; + session: ChromeStorageArea; + onChanged: ChromeStorageChangedEvent; +} + +interface ChromeIdentityWebAuthFlowDetails { + url: string; + interactive?: boolean; +} + +interface ChromeIdentityNamespace { + getRedirectURL(path?: string): string; + launchWebAuthFlow(details: ChromeIdentityWebAuthFlowDetails): Promise; +} + +interface ChromeRuntimeLastError { + message?: string; +} + +interface ChromeRuntimePortMessageEvent { + addListener(listener: (message: unknown) => void): void; + removeListener(listener: (message: unknown) => void): void; +} + +interface ChromeRuntimePortDisconnectEvent { + addListener(listener: (port: ChromeRuntimePort) => void): void; + removeListener(listener: (port: ChromeRuntimePort) => void): void; +} + +interface ChromeRuntimePort { + name: string; + onMessage: ChromeRuntimePortMessageEvent; + onDisconnect: ChromeRuntimePortDisconnectEvent; + postMessage(message: unknown): void; + disconnect(): void; +} + +interface ChromeRuntimeMessageSender { + tab?: ChromeTab; + frameId?: number; + id?: string; + url?: string; + tlsChannelId?: string; + origin?: string; +} + +type ChromeRuntimeMessageListener = ( + message: Record & { type?: string }, + sender: ChromeRuntimeMessageSender, + sendResponse: (response?: unknown) => void, +) => boolean | void; + +interface ChromeRuntimeOnMessageEvent { + addListener(listener: ChromeRuntimeMessageListener): void; + removeListener(listener: ChromeRuntimeMessageListener): void; +} + +interface ChromeRuntimeManifest { + version: string; + [key: string]: unknown; +} + +interface ChromeRuntimeNamespace { + readonly lastError: ChromeRuntimeLastError | undefined; + connectNative(application: string): ChromeRuntimePort; + onMessage: ChromeRuntimeOnMessageEvent; + // Generic over the response type so callers can narrow the callback + // argument without casting. Matches the de-facto shape used by the + // official @types/chrome package. + sendMessage( + message: unknown, + responseCallback?: (response: TResponse) => void, + ): void; + getManifest(): ChromeRuntimeManifest; +} + +interface ChromeTab { + id?: number; + windowId?: number; + url?: string; + active?: boolean; + title?: string; + index?: number; +} + +interface ChromeTabsQueryInfo { + active?: boolean; + lastFocusedWindow?: boolean; + url?: string | string[]; + windowId?: number; + currentWindow?: boolean; + [key: string]: unknown; +} + +interface ChromeTabsCreateProperties { + url?: string; + active?: boolean; + windowId?: number; + index?: number; +} + +interface ChromeTabsUpdateProperties { + url?: string; + active?: boolean; + [key: string]: unknown; +} + +interface ChromeTabsCaptureVisibleTabOptions { + format?: 'jpeg' | 'png'; + quality?: number; +} + +interface ChromeTabsNamespace { + query(queryInfo: ChromeTabsQueryInfo): Promise; + get(tabId: number): Promise; + create(createProperties: ChromeTabsCreateProperties): Promise; + update(tabId: number, updateProperties: ChromeTabsUpdateProperties): Promise; + captureVisibleTab( + windowId: number, + options?: ChromeTabsCaptureVisibleTabOptions, + ): Promise; +} + +interface ChromeWindowsNamespace { + readonly WINDOW_ID_CURRENT: number; + readonly WINDOW_ID_NONE: number; +} + +interface ChromeCookie { + name: string; + value: string; + domain: string; + hostOnly?: boolean; + path: string; + secure: boolean; + httpOnly: boolean; + sameSite?: 'no_restriction' | 'lax' | 'strict' | 'unspecified'; + session?: boolean; + expirationDate?: number; + storeId?: string; +} + +interface ChromeCookiesGetAllDetails { + domain?: string; + name?: string; + path?: string; + secure?: boolean; + session?: boolean; + storeId?: string; + url?: string; +} + +interface ChromeCookiesSetDetails { + url: string; + name?: string; + value?: string; + domain?: string; + path?: string; + secure?: boolean; + httpOnly?: boolean; + sameSite?: 'no_restriction' | 'lax' | 'strict' | 'unspecified'; + expirationDate?: number; + storeId?: string; +} + +interface ChromeCookiesNamespace { + getAll(details: ChromeCookiesGetAllDetails): Promise; + set(details: ChromeCookiesSetDetails): Promise; +} + +interface ChromeDebuggerDebuggee { + tabId?: number; + extensionId?: string; + targetId?: string; +} + +/** + * Chrome 125+ flat-session target. Extends `Debuggee` with an optional + * `sessionId` that addresses a child flat session created via + * `Target.attachToTarget` with `flatten: true`. The `chrome.debugger` + * sendCommand API and the `onEvent` `source` argument both accept this + * shape so child sessions can be routed via the target argument rather + * than smuggled into command params. + */ +interface ChromeDebuggerSession extends ChromeDebuggerDebuggee { + sessionId?: string; +} + +interface ChromeDebuggerOnEventEvent { + addListener( + callback: ( + source: ChromeDebuggerSession, + method: string, + params?: unknown, + ) => void, + ): void; + removeListener( + callback: ( + source: ChromeDebuggerSession, + method: string, + params?: unknown, + ) => void, + ): void; +} + +interface ChromeDebuggerOnDetachEvent { + addListener( + callback: (source: ChromeDebuggerDebuggee, reason: string) => void, + ): void; + removeListener( + callback: (source: ChromeDebuggerDebuggee, reason: string) => void, + ): void; +} + +interface ChromeDebuggerNamespace { + // Promise-style (modern MV3 usage — used by worker.ts). + attach(target: ChromeDebuggerDebuggee, requiredVersion: string): Promise; + detach(target: ChromeDebuggerDebuggee): Promise; + sendCommand( + target: ChromeDebuggerSession, + method: string, + commandParams?: Record, + ): Promise; + // Callback-style overloads (still supported in MV3). cdp-proxy.ts uses the + // callback form so it can thread errors through `chrome.runtime.lastError` + // on a per-call basis, which is what makes the injected `ChromeDebuggerApi` + // testable against a mock. + attach( + target: ChromeDebuggerDebuggee, + requiredVersion: string, + callback: () => void, + ): void; + detach(target: ChromeDebuggerDebuggee, callback: () => void): void; + sendCommand( + target: ChromeDebuggerSession, + method: string, + commandParams: Record | undefined, + callback: (result?: unknown) => void, + ): void; + onEvent: ChromeDebuggerOnEventEvent; + onDetach: ChromeDebuggerOnDetachEvent; +} + +interface ChromeGlobal { + storage: ChromeStorageNamespace; + identity: ChromeIdentityNamespace; + runtime: ChromeRuntimeNamespace; + tabs: ChromeTabsNamespace; + windows: ChromeWindowsNamespace; + cookies: ChromeCookiesNamespace; + debugger: ChromeDebuggerNamespace; +} + +declare const chrome: ChromeGlobal; diff --git a/clients/macos/build.sh b/clients/macos/build.sh index a56d72c63fa..8b37bc6c683 100755 --- a/clients/macos/build.sh +++ b/clients/macos/build.sh @@ -213,6 +213,7 @@ export SIGN_IDENTITY ASSISTANT_SRC_DIR="$SCRIPT_DIR/../../assistant" CLI_SRC_DIR="$SCRIPT_DIR/../../cli" GATEWAY_SRC_DIR="$SCRIPT_DIR/../../gateway" +NATIVE_HOST_SRC_DIR="$SCRIPT_DIR/../chrome-extension-native-host" # Packages that must stay external in compiled Bun binaries. # playwright-core has optional requires (electron, chromium-bidi) that cannot @@ -278,6 +279,9 @@ build_binaries() { (cd "$ASSISTANT_SRC_DIR" && bun install --frozen-lockfile 2>/dev/null || bun install) (cd "$CLI_SRC_DIR" && bun install --frozen-lockfile 2>/dev/null || bun install) (cd "$GATEWAY_SRC_DIR" && bun install --frozen-lockfile 2>/dev/null || bun install) + if [ -d "$NATIVE_HOST_SRC_DIR/src" ]; then + (cd "$NATIVE_HOST_SRC_DIR" && bun install --frozen-lockfile 2>/dev/null || bun install) + fi # Shared flags for daemon and assistant CLI local daemon_flags=("${BUN_EXTERNAL_FLAGS[@]}") @@ -318,6 +322,12 @@ build_binaries() { "$SCRIPT_DIR/gateway-bin" "vellum-gateway" & pids+=($!) + if [ -d "$NATIVE_HOST_SRC_DIR/src" ]; then + SKIP_BUN_INSTALL=1 build_bun_binary "$NATIVE_HOST_SRC_DIR" "$NATIVE_HOST_SRC_DIR/src/index.ts" \ + "$SCRIPT_DIR/native-host-bin" "vellum-chrome-native-host" & + pids+=($!) + fi + for pid in "${pids[@]}"; do wait "$pid" || failures=$((failures + 1)) done @@ -388,7 +398,7 @@ case "$CMD" in clean) echo "Cleaning..." rm -rf "$SCRIPT_DIR/dist" "$SCRIPT_DIR/../.build" - rm -rf "$SCRIPT_DIR/daemon-bin" "$SCRIPT_DIR/assistant-bin" "$SCRIPT_DIR/cli-bin" "$SCRIPT_DIR/gateway-bin" + rm -rf "$SCRIPT_DIR/daemon-bin" "$SCRIPT_DIR/assistant-bin" "$SCRIPT_DIR/cli-bin" "$SCRIPT_DIR/gateway-bin" "$SCRIPT_DIR/native-host-bin" rm -rf "$SPM_MODULE_CACHE" echo "Done." exit 0 @@ -433,7 +443,7 @@ if [ "$CMD" = "release" ] || [ "$CMD" = "release-application" ]; then # (e.g. arm64 binaries from a previous build being bundled into an x86_64 release). # Skip when SKIP_BUN_REBUILD=1, since pre-built binaries are intentionally provided. if [ "${SKIP_BUN_REBUILD:-}" != "1" ]; then - rm -rf "$SCRIPT_DIR/daemon-bin" "$SCRIPT_DIR/assistant-bin" "$SCRIPT_DIR/cli-bin" "$SCRIPT_DIR/gateway-bin" + rm -rf "$SCRIPT_DIR/daemon-bin" "$SCRIPT_DIR/assistant-bin" "$SCRIPT_DIR/cli-bin" "$SCRIPT_DIR/gateway-bin" "$SCRIPT_DIR/native-host-bin" fi fi fi @@ -620,6 +630,35 @@ if [ -f "$SCRIPT_DIR/gateway-bin/vellum-gateway" ]; then fi fi +# Auto-build Chrome native messaging helper binary if missing or stale +# and bun is available. This is the binary Chrome spawns via +# chrome.runtime.connectNative("com.vellum.daemon") — see +# clients/chrome-extension-native-host/ for the source and +# clients/macos/vellum-assistant/Features/Installer/NativeMessagingInstaller.swift +# for the manifest that points at the bundled copy. +NATIVE_HOST_BIN_NEEDS_BUILD=false +if [ "${SKIP_BUN_REBUILD:-}" != "1" ] && [ -d "$NATIVE_HOST_SRC_DIR/src" ] && command -v bun &>/dev/null; then + if [ ! -f "$SCRIPT_DIR/native-host-bin/vellum-chrome-native-host" ]; then + NATIVE_HOST_BIN_NEEDS_BUILD=true + elif [ -n "$(find "$NATIVE_HOST_SRC_DIR/src" -name '*.ts' -newer "$SCRIPT_DIR/native-host-bin/vellum-chrome-native-host" -print -quit 2>/dev/null)" ]; then + NATIVE_HOST_BIN_NEEDS_BUILD=true + elif [ "$NATIVE_HOST_SRC_DIR/package.json" -nt "$SCRIPT_DIR/native-host-bin/vellum-chrome-native-host" ] || \ + { [ -f "$NATIVE_HOST_SRC_DIR/bun.lock" ] && [ "$NATIVE_HOST_SRC_DIR/bun.lock" -nt "$SCRIPT_DIR/native-host-bin/vellum-chrome-native-host" ]; }; then + NATIVE_HOST_BIN_NEEDS_BUILD=true + fi +fi +if [ "$NATIVE_HOST_BIN_NEEDS_BUILD" = true ]; then + build_bun_binary "$NATIVE_HOST_SRC_DIR" "$NATIVE_HOST_SRC_DIR/src/index.ts" \ + "$SCRIPT_DIR/native-host-bin" "vellum-chrome-native-host" +fi + +# Also rebuild if native host binary changed or newly added +if [ -f "$SCRIPT_DIR/native-host-bin/vellum-chrome-native-host" ]; then + if [ ! -f "$MACOS_DIR/vellum-chrome-native-host" ] || [ "$SCRIPT_DIR/native-host-bin/vellum-chrome-native-host" -nt "$MACOS_DIR/vellum-chrome-native-host" ]; then + NEEDS_REBUILD=true + fi +fi + # Ensure .app bundle structure exists mkdir -p "$MACOS_DIR" "$RESOURCES_DIR" "$FRAMEWORKS_DIR" @@ -676,6 +715,19 @@ if [ "$NEEDS_REBUILD" = true ]; then echo "No gateway binary at $GATEWAY_BIN — skipping (dev mode)" fi + # Copy bundled Chrome native messaging helper binary (if available). + # This is an auxiliary executable under Contents/MacOS/ that Chrome + # spawns via the com.vellum.daemon.json manifest written by + # NativeMessagingInstaller at first launch. + NATIVE_HOST_BIN="$SCRIPT_DIR/native-host-bin/vellum-chrome-native-host" + if [ -f "$NATIVE_HOST_BIN" ]; then + echo "Bundling Chrome native messaging helper binary..." + cp "$NATIVE_HOST_BIN" "$MACOS_DIR/vellum-chrome-native-host" + chmod +x "$MACOS_DIR/vellum-chrome-native-host" + else + echo "No Chrome native messaging helper binary at $NATIVE_HOST_BIN — skipping (dev mode)" + fi + else echo "Binaries unchanged, skipping binary repackaging" fi @@ -1124,6 +1176,16 @@ if [ -f "$MACOS_DIR/vellum-gateway" ]; then echo "Gateway binary signed" fi +# Sign Chrome native messaging helper binary +if [ -f "$MACOS_DIR/vellum-chrome-native-host" ]; then + NATIVE_HOST_SIGN_FLAGS=(--force --sign "$SIGN_IDENTITY") + if [ "$CONFIG" = "release" ] && [ "$SIGN_IDENTITY" != "-" ]; then + NATIVE_HOST_SIGN_FLAGS+=(--timestamp --options runtime) + fi + codesign "${NATIVE_HOST_SIGN_FLAGS[@]}" "$MACOS_DIR/vellum-chrome-native-host" + echo "Chrome native messaging helper binary signed" +fi + # Embedding runtime node_modules are no longer bundled (downloaded post-hatch). # Sign any additional regular files directly under Contents/MacOS. @@ -1138,6 +1200,7 @@ if [ -d "$MACOS_DIR" ]; then ! -name "vellum-daemon" \ ! -name "vellum-cli" \ ! -name "vellum-gateway" \ + ! -name "vellum-chrome-native-host" \ -exec codesign "${EXTRA_FILE_SIGN_FLAGS[@]}" {} \; fi diff --git a/clients/macos/vellum-assistant/App/AppDelegate+NativeMessaging.swift b/clients/macos/vellum-assistant/App/AppDelegate+NativeMessaging.swift new file mode 100644 index 00000000000..c2ff3ca4987 --- /dev/null +++ b/clients/macos/vellum-assistant/App/AppDelegate+NativeMessaging.swift @@ -0,0 +1,110 @@ +import AppKit +import Foundation +import VellumAssistantShared +import os + +private let log = Logger(subsystem: Bundle.appBundleIdentifier, category: "AppDelegate+NativeMessaging") + +/// Install-time hook for the Chrome native messaging host manifest. +/// +/// See `NativeMessagingInstaller` and +/// `clients/chrome-extension-native-host/` for the full flow: +/// this extension is responsible for (1) locating the bundled +/// `vellum-chrome-native-host` helper binary inside the `.app` +/// bundle at launch time, and (2) writing the +/// `com.vellum.daemon.json` manifest into Chrome's well-known +/// `~/Library/Application Support/Google/Chrome/NativeMessagingHosts/` +/// directory so Chrome will spawn that helper when the Vellum +/// extension calls `chrome.runtime.connectNative("com.vellum.daemon")`. +/// +/// This runs off the main thread from `applicationDidFinishLaunching` +/// because writing to `~/Library` involves disk I/O and we do not +/// want to block app launch if the directory has unusual permissions. +/// It also runs unconditionally on every launch (cheap, idempotent) +/// so that upgrading the app bundle automatically repoints the +/// manifest at the newer helper binary path. +extension AppDelegate { + + /// Installs the Chrome native messaging host manifest for the + /// Vellum chrome extension. Idempotent — safe to call on every + /// launch. Runs off the main thread. + /// + /// This method is deliberately non-`@MainActor`: it touches no + /// app state, does pure file I/O under `~/Library`, and follows + /// the same off-main-thread pattern as `installCLISymlinkIfNeeded`. + nonisolated static func installChromeNativeMessagingHostIfNeeded() { + guard let helperBinaryUrl = resolveBundledNativeMessagingHelper() else { + // Normal for dev builds where the helper binary hasn't + // been built yet (see `clients/chrome-extension-native-host` + // and the build.sh wiring). Not an error — the self-hosted + // Chrome extension pairing flow (PR 13) is optional, and + // everything else in the assistant continues to work. + log.info("vellum-chrome-native-host helper not bundled — skipping Chrome manifest install (dev build?)") + return + } + + do { + try NativeMessagingInstaller.installChromeManifest( + helperBinaryPath: helperBinaryUrl, + extensionId: ChromeExtensionAllowlist.devPlaceholderId + ) + } catch { + // Best-effort: a failing manifest install must not crash + // the app. Log at warning so it shows up in diagnostics + // but does not spam the error channel. + log.warning( + "Failed to install Chrome native messaging manifest: \(error.localizedDescription, privacy: .public)" + ) + } + } + + /// Resolves the absolute URL of the bundled + /// `vellum-chrome-native-host` helper binary inside the running + /// app bundle, or `nil` if it is not present (dev builds that + /// haven't run the helper's `bun run build` yet). + /// + /// Tries `Bundle.main.url(forAuxiliaryExecutable:)` first — which + /// is the Apple-recommended way to look up secondary binaries + /// inside `Contents/MacOS/` — and falls back to a direct path + /// computation for builds that package the binary at a + /// non-standard location. + nonisolated static func resolveBundledNativeMessagingHelper() -> URL? { + let binaryName = "vellum-chrome-native-host" + + if let url = Bundle.main.url(forAuxiliaryExecutable: binaryName) { + return url + } + + // Fallback: compute the path directly against the bundle's + // executable URL. This matches how `installCLISymlinkIfNeeded` + // discovers the `vellum-cli` sibling binary. + if let execURL = Bundle.main.executableURL { + let candidate = execURL + .deletingLastPathComponent() + .appendingPathComponent(binaryName) + if FileManager.default.fileExists(atPath: candidate.path) { + return candidate + } + } + + return nil + } +} + +/// Hard-coded allowlist of Chrome extension IDs the installer pins +/// into the manifest's `allowed_origins`. Must stay in lockstep with +/// `ALLOWED_EXTENSION_IDS` in +/// `clients/chrome-extension-native-host/src/index.ts` (PR 7) and the +/// allowlist the assistant's `/v1/browser-extension-pair` endpoint +/// checks (PR 11). +/// +/// Kept in a standalone enum so unit tests can reference it without +/// instantiating `AppDelegate`. +enum ChromeExtensionAllowlist { + /// Dev placeholder id. Matches the single entry currently present + /// in the helper binary's allowlist in PR 7. Replaced before + /// release with the production extension id — see the + /// `TODO: production id before release` comment in + /// `clients/chrome-extension-native-host/src/index.ts`. + static let devPlaceholderId = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +} diff --git a/clients/macos/vellum-assistant/App/AppDelegate.swift b/clients/macos/vellum-assistant/App/AppDelegate.swift index 373ef401905..145b36b3305 100644 --- a/clients/macos/vellum-assistant/App/AppDelegate.swift +++ b/clients/macos/vellum-assistant/App/AppDelegate.swift @@ -527,6 +527,16 @@ public final class AppDelegate: NSObject, NSApplicationDelegate { Self.installCLISymlinkIfNeeded(isDevMode: isDevMode) } + // Install the Chrome native messaging host manifest so the + // Vellum Chrome extension can spawn the bundled helper binary + // via `chrome.runtime.connectNative("com.vellum.daemon")`. + // Best-effort and idempotent — see + // `AppDelegate+NativeMessaging.swift` for details. Runs off + // the main thread because it touches `~/Library` on disk. + Task.detached(priority: .utility) { + Self.installChromeNativeMessagingHostIfNeeded() + } + let hasAssistants = lockfileHasAssistants() log.info("[appLaunch] skipOnboarding=\(skipOnboarding) hasAssistants=\(hasAssistants)") diff --git a/clients/macos/vellum-assistant/Features/Installer/NativeMessagingInstaller.swift b/clients/macos/vellum-assistant/Features/Installer/NativeMessagingInstaller.swift new file mode 100644 index 00000000000..5b204826cdf --- /dev/null +++ b/clients/macos/vellum-assistant/Features/Installer/NativeMessagingInstaller.swift @@ -0,0 +1,182 @@ +import Foundation +import os + +private let log = Logger(subsystem: Bundle.appBundleIdentifier, category: "NativeMessagingInstaller") + +/// Installs and removes the Chrome Native Messaging host manifest that +/// points at the bundled `vellum-chrome-native-host` helper binary. +/// +/// Chrome looks for per-user native messaging host manifests in +/// `~/Library/Application Support/Google/Chrome/NativeMessagingHosts/`. +/// When the Vellum Chrome extension calls +/// `chrome.runtime.connectNative("com.vellum.daemon")`, Chrome reads +/// `com.vellum.daemon.json` from that directory, verifies that the +/// calling extension's ID is on the manifest's `allowed_origins` +/// list, and spawns the binary referenced by `path`. +/// +/// See `clients/chrome-extension-native-host/` (PR 7) for the helper +/// binary and `clients/chrome-extension-native-host/com.vellum.daemon.json.template` +/// for the shape of the manifest this installer writes. +/// +/// This helper intentionally carries **no** app state so it can run +/// safely off the main thread from `applicationDidFinishLaunching` +/// without `@MainActor` isolation, matching the pattern used by +/// `AppDelegate.installCLISymlinkIfNeeded(isDevMode:)`. +public enum NativeMessagingInstaller { + + /// Canonical Chrome native messaging host name for the Vellum helper. + /// Intentionally left as `com.vellum.daemon` because it is a technical + /// identifier baked into Chrome's manifest lookup — not user-facing — + /// and must match `chrome.runtime.connectNative("com.vellum.daemon")` + /// in the extension (see PR 13). + public static let hostName = "com.vellum.daemon" + + /// Human-readable description written into the manifest's + /// `description` field. Per `clients/AGENTS.md` the user-facing + /// wording prefers "assistant" over "daemon". + public static let hostDescription = "Vellum assistant native messaging host" + + /// Errors surfaced by the installer. Rendered to the app log rather + /// than bubbled to the UI — the assistant continues to run even if + /// the manifest install fails, and the Chrome extension's + /// self-hosted pairing flow (PR 13) will simply not work until it + /// is resolved. + /// + /// Conforms to `LocalizedError` (rather than only + /// `CustomStringConvertible`) so that `error.localizedDescription` + /// returns the human-readable string below instead of Foundation's + /// generic "The operation couldn't be completed (… error 0.)" + /// fallback. This matches the convention used by other error types + /// in this app (see `RecorderError`, `CaptureError`, + /// `ExecutorError`, etc.). + public enum InstallError: Error, LocalizedError { + case helperBinaryMissing(URL) + + public var errorDescription: String? { + switch self { + case .helperBinaryMissing(let url): + return "Native messaging helper binary not found at \(url.path)" + } + } + } + + // MARK: - Public API + + /// Writes the `com.vellum.daemon.json` manifest under the current + /// user's Chrome native messaging hosts directory. Overwrites any + /// existing manifest so upgrades cleanly repoint at the new helper + /// binary path. + /// + /// - Parameters: + /// - helperBinaryPath: Absolute path to the bundled native + /// messaging helper binary (e.g. + /// `…/Contents/MacOS/vellum-chrome-native-host`). Must exist — + /// Chrome refuses to spawn a host whose `path` is missing. + /// - extensionId: The Chrome extension ID to pin in + /// `allowed_origins`. Must match the allowlist enforced by the + /// helper binary itself (PR 7 `ALLOWED_EXTENSION_IDS`) and the + /// runtime pair endpoint's allowlist (PR 11). + public static func installChromeManifest( + helperBinaryPath: URL, + extensionId: String + ) throws { + try installChromeManifest( + helperBinaryPath: helperBinaryPath, + extensionId: extensionId, + homeDirectory: FileManager.default.homeDirectoryForCurrentUser, + fileManager: FileManager.default + ) + } + + /// Removes the `com.vellum.daemon.json` manifest if present. Safe to + /// call when the manifest does not exist — returns without error. + public static func uninstallChromeManifest() throws { + try uninstallChromeManifest( + homeDirectory: FileManager.default.homeDirectoryForCurrentUser, + fileManager: FileManager.default + ) + } + + // MARK: - Testable overloads + + /// Test-only overload that allows injecting a mock home directory so + /// the installer can be exercised without touching the real Chrome + /// directory under the tester's home folder. + internal static func installChromeManifest( + helperBinaryPath: URL, + extensionId: String, + homeDirectory: URL, + fileManager: FileManager + ) throws { + guard fileManager.fileExists(atPath: helperBinaryPath.path) else { + throw InstallError.helperBinaryMissing(helperBinaryPath) + } + + let targetDir = manifestDirectory(under: homeDirectory) + try fileManager.createDirectory( + at: targetDir, + withIntermediateDirectories: true, + attributes: nil + ) + + let manifestUrl = targetDir.appendingPathComponent("\(hostName).json") + + // JSONSerialization is used (rather than a Codable struct) so the + // field order matches the Chrome-expected shape and so the + // structure lines up 1:1 with the .template file checked into + // the chrome-extension-native-host package. + // + // Swift dictionaries are unordered, but Chrome doesn't care about + // field order — it just parses the object — so we prioritize + // clarity here. + let manifest: [String: Any] = [ + "name": hostName, + "description": hostDescription, + "path": helperBinaryPath.path, + "type": "stdio", + "allowed_origins": ["chrome-extension://\(extensionId)/"], + ] + + let data = try JSONSerialization.data( + withJSONObject: manifest, + options: [.prettyPrinted, .sortedKeys] + ) + try data.write(to: manifestUrl, options: .atomic) + + // Chrome requires the manifest to be readable by the user; 0o644 + // is what Google's own documentation for macOS native messaging + // host manifests uses and it matches the DMG/installer patterns + // used elsewhere in this app. + try fileManager.setAttributes( + [.posixPermissions: NSNumber(value: 0o644)], + ofItemAtPath: manifestUrl.path + ) + + log.info("Installed Chrome native messaging manifest at \(manifestUrl.path, privacy: .public)") + } + + internal static func uninstallChromeManifest( + homeDirectory: URL, + fileManager: FileManager + ) throws { + let manifestUrl = manifestDirectory(under: homeDirectory) + .appendingPathComponent("\(hostName).json") + if fileManager.fileExists(atPath: manifestUrl.path) { + try fileManager.removeItem(at: manifestUrl) + log.info("Removed Chrome native messaging manifest at \(manifestUrl.path, privacy: .public)") + } + } + + /// Resolves the directory where Chrome looks up per-user native + /// messaging host manifests, given a home directory. Exposed so + /// tests can build the expected path without duplicating the + /// constant string. + internal static func manifestDirectory(under homeDirectory: URL) -> URL { + homeDirectory + .appendingPathComponent("Library", isDirectory: true) + .appendingPathComponent("Application Support", isDirectory: true) + .appendingPathComponent("Google", isDirectory: true) + .appendingPathComponent("Chrome", isDirectory: true) + .appendingPathComponent("NativeMessagingHosts", isDirectory: true) + } +} diff --git a/clients/macos/vellum-assistantTests/NativeMessagingInstallerTests.swift b/clients/macos/vellum-assistantTests/NativeMessagingInstallerTests.swift new file mode 100644 index 00000000000..e207cf29254 --- /dev/null +++ b/clients/macos/vellum-assistantTests/NativeMessagingInstallerTests.swift @@ -0,0 +1,253 @@ +import Foundation +import XCTest +@testable import VellumAssistantLib + +/// Tests for `NativeMessagingInstaller` — the macOS install-time +/// helper that writes the Chrome native messaging host manifest +/// (`com.vellum.daemon.json`) into Chrome's well-known per-user +/// `NativeMessagingHosts/` directory. +/// +/// These tests use an injected mock `homeDirectory` so the installer +/// writes under a fresh `temporaryDirectory` rather than the real +/// tester's `~/Library/Application Support/Google/Chrome/`. The +/// production public entry points (`installChromeManifest(...)`, +/// `uninstallChromeManifest()`) use `FileManager.default`; the tests +/// exercise the internal testable overloads that accept both the +/// home directory and the file manager explicitly. +final class NativeMessagingInstallerTests: XCTestCase { + private var tempDir: URL! + private var mockHome: URL! + private var helperBinaryUrl: URL! + + override func setUp() { + super.setUp() + + // A fresh scratch root per test, isolated to the test bundle + // so parallel test runs can't collide. + tempDir = FileManager.default.temporaryDirectory + .appendingPathComponent("NativeMessagingInstallerTests-\(UUID().uuidString)", isDirectory: true) + try! FileManager.default.createDirectory(at: tempDir, withIntermediateDirectories: true) + + // Simulate ~/ under tempDir so the installer computes + // ~/Library/Application Support/Google/Chrome/NativeMessagingHosts + // relative to a controlled root. + mockHome = tempDir.appendingPathComponent("home", isDirectory: true) + try! FileManager.default.createDirectory(at: mockHome, withIntermediateDirectories: true) + + // Stand in for the bundled `vellum-chrome-native-host` binary. + // The installer only verifies existence via + // `fileExists(atPath:)`, so a placeholder file is sufficient. + helperBinaryUrl = tempDir.appendingPathComponent("vellum-chrome-native-host") + FileManager.default.createFile( + atPath: helperBinaryUrl.path, + contents: Data("#!/bin/sh\nexit 0\n".utf8), + attributes: [.posixPermissions: NSNumber(value: 0o755)] + ) + } + + override func tearDown() { + try? FileManager.default.removeItem(at: tempDir) + super.tearDown() + } + + // MARK: - install + + func testInstallWritesManifestWithExpectedStructure() throws { + try NativeMessagingInstaller.installChromeManifest( + helperBinaryPath: helperBinaryUrl, + extensionId: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + homeDirectory: mockHome, + fileManager: .default + ) + + let manifestUrl = NativeMessagingInstaller + .manifestDirectory(under: mockHome) + .appendingPathComponent("com.vellum.daemon.json") + + XCTAssertTrue( + FileManager.default.fileExists(atPath: manifestUrl.path), + "manifest should exist at expected path" + ) + + let data = try Data(contentsOf: manifestUrl) + let parsed = try XCTUnwrap( + try JSONSerialization.jsonObject(with: data) as? [String: Any] + ) + + XCTAssertEqual(parsed["name"] as? String, "com.vellum.daemon") + XCTAssertEqual(parsed["description"] as? String, "Vellum assistant native messaging host") + XCTAssertEqual(parsed["type"] as? String, "stdio") + XCTAssertEqual(parsed["path"] as? String, helperBinaryUrl.path) + + let origins = try XCTUnwrap(parsed["allowed_origins"] as? [String]) + XCTAssertEqual(origins, ["chrome-extension://aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/"]) + } + + func testInstallSetsManifestPermissionsTo0o644() throws { + try NativeMessagingInstaller.installChromeManifest( + helperBinaryPath: helperBinaryUrl, + extensionId: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + homeDirectory: mockHome, + fileManager: .default + ) + + let manifestUrl = NativeMessagingInstaller + .manifestDirectory(under: mockHome) + .appendingPathComponent("com.vellum.daemon.json") + + let attrs = try FileManager.default.attributesOfItem(atPath: manifestUrl.path) + let perms = try XCTUnwrap(attrs[.posixPermissions] as? NSNumber) + XCTAssertEqual(perms.intValue, 0o644) + } + + func testInstallCreatesIntermediateNativeMessagingHostsDirectory() throws { + // Sanity: the mock home starts without a Chrome subtree. + let expectedDir = NativeMessagingInstaller.manifestDirectory(under: mockHome) + XCTAssertFalse( + FileManager.default.fileExists(atPath: expectedDir.path), + "precondition: NativeMessagingHosts directory should not yet exist" + ) + + try NativeMessagingInstaller.installChromeManifest( + helperBinaryPath: helperBinaryUrl, + extensionId: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + homeDirectory: mockHome, + fileManager: .default + ) + + var isDir: ObjCBool = false + XCTAssertTrue( + FileManager.default.fileExists(atPath: expectedDir.path, isDirectory: &isDir), + "NativeMessagingHosts directory should have been created" + ) + XCTAssertTrue(isDir.boolValue, "NativeMessagingHosts should be a directory") + } + + func testInstallOverwritesExistingManifest() throws { + // First install with a stale helper path/extension id. + let staleBinary = tempDir.appendingPathComponent("stale-binary") + FileManager.default.createFile( + atPath: staleBinary.path, + contents: Data("old\n".utf8), + attributes: nil + ) + try NativeMessagingInstaller.installChromeManifest( + helperBinaryPath: staleBinary, + extensionId: "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", + homeDirectory: mockHome, + fileManager: .default + ) + + // Re-install with the canonical helper binary and placeholder id. + try NativeMessagingInstaller.installChromeManifest( + helperBinaryPath: helperBinaryUrl, + extensionId: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + homeDirectory: mockHome, + fileManager: .default + ) + + let manifestUrl = NativeMessagingInstaller + .manifestDirectory(under: mockHome) + .appendingPathComponent("com.vellum.daemon.json") + let data = try Data(contentsOf: manifestUrl) + let parsed = try XCTUnwrap( + try JSONSerialization.jsonObject(with: data) as? [String: Any] + ) + + XCTAssertEqual( + parsed["path"] as? String, + helperBinaryUrl.path, + "second install should overwrite the stale path" + ) + XCTAssertEqual( + parsed["allowed_origins"] as? [String], + ["chrome-extension://aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/"], + "second install should overwrite the stale allowed_origins" + ) + } + + func testInstallRejectsMissingHelperBinary() { + let missingBinary = tempDir.appendingPathComponent("does-not-exist") + + XCTAssertThrowsError( + try NativeMessagingInstaller.installChromeManifest( + helperBinaryPath: missingBinary, + extensionId: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + homeDirectory: mockHome, + fileManager: .default + ) + ) { error in + guard case NativeMessagingInstaller.InstallError.helperBinaryMissing(let url) = error else { + XCTFail("expected helperBinaryMissing, got \(error)") + return + } + XCTAssertEqual(url.path, missingBinary.path) + } + + // The installer must not leave behind a partial manifest when + // the helper is missing. + let manifestUrl = NativeMessagingInstaller + .manifestDirectory(under: mockHome) + .appendingPathComponent("com.vellum.daemon.json") + XCTAssertFalse( + FileManager.default.fileExists(atPath: manifestUrl.path), + "manifest must not be written when helper is missing" + ) + } + + // MARK: - uninstall + + func testUninstallRemovesManifest() throws { + try NativeMessagingInstaller.installChromeManifest( + helperBinaryPath: helperBinaryUrl, + extensionId: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + homeDirectory: mockHome, + fileManager: .default + ) + + let manifestUrl = NativeMessagingInstaller + .manifestDirectory(under: mockHome) + .appendingPathComponent("com.vellum.daemon.json") + XCTAssertTrue(FileManager.default.fileExists(atPath: manifestUrl.path)) + + try NativeMessagingInstaller.uninstallChromeManifest( + homeDirectory: mockHome, + fileManager: .default + ) + + XCTAssertFalse( + FileManager.default.fileExists(atPath: manifestUrl.path), + "manifest should be removed after uninstall" + ) + } + + func testUninstallIsNoOpWhenManifestMissing() { + // Precondition: no install happened, so no manifest on disk. + let manifestUrl = NativeMessagingInstaller + .manifestDirectory(under: mockHome) + .appendingPathComponent("com.vellum.daemon.json") + XCTAssertFalse(FileManager.default.fileExists(atPath: manifestUrl.path)) + + XCTAssertNoThrow( + try NativeMessagingInstaller.uninstallChromeManifest( + homeDirectory: mockHome, + fileManager: .default + ) + ) + } + + // MARK: - manifestDirectory + + func testManifestDirectoryMatchesChromeExpectedLayout() { + let dir = NativeMessagingInstaller.manifestDirectory(under: mockHome) + let relative = dir.path.replacingOccurrences(of: mockHome.path, with: "") + + // Chrome's documented location for per-user native messaging + // host manifests on macOS. Any drift from this layout will + // break `chrome.runtime.connectNative("com.vellum.daemon")`. + XCTAssertEqual( + relative, + "/Library/Application Support/Google/Chrome/NativeMessagingHosts" + ) + } +} diff --git a/clients/shared/Network/MessageTypes.swift b/clients/shared/Network/MessageTypes.swift index 1b27d4fd664..47945d4eeb9 100644 --- a/clients/shared/Network/MessageTypes.swift +++ b/clients/shared/Network/MessageTypes.swift @@ -42,6 +42,10 @@ import Foundation // │ │ code generator cannot express it │ // │ HostCuResultPayload │ Posted back to daemon; hand-maintained │ // │ │ alongside HostCuRequest │ +// │ HostBrowserRequest │ Uses AnyCodable for `cdpParams`; client │ +// │ │ decodes only to keep SSE healthy │ +// │ HostBrowserCancelRequest │ Hand-maintained alongside │ +// │ │ HostBrowserRequest │ // │ SkillSearchResult │ Client-only result wrapper for search; │ // │ │ not a wire type │ // │ SkillOperationResult │ Client-only result wrapper for skill │ @@ -1602,6 +1606,47 @@ public struct HostCuCancelRequest: Decodable, Sendable { public let requestId: String } +// MARK: - Host Browser Proxy + +/// Request from the daemon to execute a Chrome DevTools Protocol (CDP) command on +/// the host browser. The desktop client decodes this so the SSE stream does not +/// fail-closed; the actual CDP execution lives in the Chrome extension and is not +/// handled directly by the macOS client. +public struct HostBrowserRequest: Decodable, Sendable { + public let type: String + public let requestId: String + public let conversationId: String + public let cdpMethod: String + public let cdpParams: [String: AnyCodable]? + public let cdpSessionId: String? + // Modeled as Double? to match the daemon's `timeout_seconds?: number` wire + // contract (which permits fractional values such as 0.01) and to mirror + // `HostBashRequest.timeoutSeconds`. Using Int? here would cause + // JSONDecoder to throw a type-mismatch on fractional timeouts and drop the + // entire host_browser_request event from the SSE stream. + public let timeoutSeconds: Double? + + private enum CodingKeys: String, CodingKey { + case type + case requestId + case conversationId + case cdpMethod + case cdpParams + case cdpSessionId + // The daemon wire format for this field is snake_case while the + // sibling fields above are camelCase, so map it explicitly. + case timeoutSeconds = "timeout_seconds" + } +} + +/// Cancellation signal from the daemon telling the host browser to abort an +/// in-flight CDP command identified by `requestId`. As with `HostBrowserRequest` +/// the macOS client only decodes this to keep the SSE stream healthy. +public struct HostBrowserCancelRequest: Decodable, Sendable { + public let type: String + public let requestId: String +} + /// Payload posted back to the daemon with the result of a host CU action execution. public struct HostCuResultPayload: Codable, Sendable { public let requestId: String @@ -2279,6 +2324,8 @@ public enum ServerMessage: Decodable, Sendable { case hostFileCancel(HostFileCancelRequest) case hostCuRequest(HostCuRequest) case hostCuCancel(HostCuCancelRequest) + case hostBrowserRequest(HostBrowserRequest) + case hostBrowserCancel(HostBrowserCancelRequest) case permissionModeUpdate(PermissionModeUpdateMessage) case usageUpdate(UsageUpdate) case serviceGroupUpdateStarting(ServiceGroupUpdateStartingMessage) @@ -2733,6 +2780,12 @@ public enum ServerMessage: Decodable, Sendable { case "host_cu_cancel": let message = try HostCuCancelRequest(from: decoder) self = .hostCuCancel(message) + case "host_browser_request": + let message = try HostBrowserRequest(from: decoder) + self = .hostBrowserRequest(message) + case "host_browser_cancel": + let message = try HostBrowserCancelRequest(from: decoder) + self = .hostBrowserCancel(message) case "permission_mode_update": let message = try PermissionModeUpdateMessage(from: decoder) self = .permissionModeUpdate(message) diff --git a/clients/shared/Tests/MessageTypesTests.swift b/clients/shared/Tests/MessageTypesTests.swift new file mode 100644 index 00000000000..7eeb0ec1cfd --- /dev/null +++ b/clients/shared/Tests/MessageTypesTests.swift @@ -0,0 +1,135 @@ +import XCTest + +@testable import VellumAssistantShared + +/// Unit tests for `ServerMessage` discriminated-union decoding. +/// +/// Phase 2 of the Host Browser Proxy work added `host_browser_request` and +/// `host_browser_cancel` cases. These tests assert the SSE decoder does not +/// fail-closed on those types and that the payload fields round-trip cleanly. +final class MessageTypesTests: XCTestCase { + private let decoder = JSONDecoder() + + // MARK: - host_browser_request + + func testDecodes_hostBrowserRequest_withAllFields() throws { + let json = Data( + """ + { + "type": "host_browser_request", + "requestId": "req-abc-123", + "conversationId": "conv-xyz-789", + "cdpMethod": "Page.navigate", + "cdpParams": { + "url": "https://example.com", + "transitionType": "typed" + }, + "cdpSessionId": "session-555", + "timeout_seconds": 45.5 + } + """.utf8 + ) + + let message = try decoder.decode(ServerMessage.self, from: json) + + guard case .hostBrowserRequest(let request) = message else { + XCTFail("Expected .hostBrowserRequest, got \(message)") + return + } + + XCTAssertEqual(request.type, "host_browser_request") + XCTAssertEqual(request.requestId, "req-abc-123") + XCTAssertEqual(request.conversationId, "conv-xyz-789") + XCTAssertEqual(request.cdpMethod, "Page.navigate") + XCTAssertEqual(request.cdpSessionId, "session-555") + XCTAssertEqual(request.timeoutSeconds, 45.5) + + let params = try XCTUnwrap(request.cdpParams) + XCTAssertEqual(params["url"]?.value as? String, "https://example.com") + XCTAssertEqual(params["transitionType"]?.value as? String, "typed") + } + + /// Regression test for the typing fix that changed `timeoutSeconds` from + /// `Int?` to `Double?`. The daemon's wire contract is `timeout_seconds?: + /// number`, which permits fractional values such as `0.01`. With the old + /// `Int?` typing, `JSONDecoder` would throw a type-mismatch on this + /// payload and the SSE decoder would drop the entire `host_browser_request` + /// event — exactly the failure mode this Phase 2 PR is meant to prevent. + func testDecodes_hostBrowserRequest_withFractionalTimeoutSeconds() throws { + let json = Data( + """ + { + "type": "host_browser_request", + "requestId": "req-frac", + "conversationId": "conv-frac", + "cdpMethod": "Page.navigate", + "timeout_seconds": 0.01 + } + """.utf8 + ) + + let message = try decoder.decode(ServerMessage.self, from: json) + + guard case .hostBrowserRequest(let request) = message else { + XCTFail("Expected .hostBrowserRequest, got \(message)") + return + } + + XCTAssertEqual(request.type, "host_browser_request") + XCTAssertEqual(request.requestId, "req-frac") + XCTAssertEqual(request.conversationId, "conv-frac") + XCTAssertEqual(request.cdpMethod, "Page.navigate") + XCTAssertEqual(request.timeoutSeconds, 0.01) + } + + func testDecodes_hostBrowserRequest_withOptionalFieldsAbsent() throws { + let json = Data( + """ + { + "type": "host_browser_request", + "requestId": "req-min", + "conversationId": "conv-min", + "cdpMethod": "Browser.getVersion" + } + """.utf8 + ) + + let message = try decoder.decode(ServerMessage.self, from: json) + + guard case .hostBrowserRequest(let request) = message else { + XCTFail("Expected .hostBrowserRequest, got \(message)") + return + } + + XCTAssertEqual(request.type, "host_browser_request") + XCTAssertEqual(request.requestId, "req-min") + XCTAssertEqual(request.conversationId, "conv-min") + XCTAssertEqual(request.cdpMethod, "Browser.getVersion") + XCTAssertNil(request.cdpParams) + XCTAssertNil(request.cdpSessionId) + XCTAssertNil(request.timeoutSeconds) + } + + // MARK: - host_browser_cancel + + func testDecodes_hostBrowserCancel() throws { + let json = Data( + """ + { + "type": "host_browser_cancel", + "requestId": "req-abc-123" + } + """.utf8 + ) + + let message = try decoder.decode(ServerMessage.self, from: json) + + guard case .hostBrowserCancel(let cancel) = message else { + XCTFail("Expected .hostBrowserCancel, got \(message)") + return + } + + XCTAssertEqual(cancel.type, "host_browser_cancel") + XCTAssertEqual(cancel.requestId, "req-abc-123") + } +} diff --git a/docs/browser-use-architecture-phase2.md b/docs/browser-use-architecture-phase2.md new file mode 100644 index 00000000000..e96a926bff0 --- /dev/null +++ b/docs/browser-use-architecture-phase2.md @@ -0,0 +1,22 @@ +# Browser Use Architecture — Phase 2 notes + +## chrome.debugger infobar + +When the Chrome extension calls `chrome.debugger.attach(target, requiredVersion)`, Chrome displays a persistent yellow infobar at the top of the affected tab saying "Vellum started debugging this browser." This is an intentional security mitigation — it cannot be suppressed via the public MV3 API. + +### Investigation (Phase 2) + +- `chrome.debugger.attach(target, requiredVersion, callback)` — three-argument form, no options parameter. Chrome 120+. (https://developer.chrome.com/docs/extensions/reference/api/debugger) +- There is no `{ silent: true }` option on attach. +- The `--silent-debugger-extension-api` command-line flag exists for Chromium but (a) requires the user to launch Chrome with the flag, (b) is not enabled by default in stable channels, and (c) is not something we can enforce on end users. +- Chrome 126+ added `chrome.debugger.attach` acceptance via `targetId` / `tabId` but did not add a silent-mode option. +- Closing the infobar does not detach the debugger; it is purely informational. + +### Decision + +Accept the infobar. The TDD already concluded this; Phase 2 confirms no public API exists to suppress it. End-user messaging in the Mac app popup should explain that the banner is expected and normal when Vellum is driving the browser. + +### Alternatives considered + +- Playwright / `chrome --remote-debugging-port` in a sacrificial profile avoids the infobar but requires installing Chromium and is out-of-scope (Phase 5). +- Chrome 146+ `chrome://inspect` attach backend may offer a less intrusive UX and is being tracked for Phase 4.