diff --git a/assistant/src/__tests__/fixtures/mock-chrome-extension.ts b/assistant/src/__tests__/fixtures/mock-chrome-extension.ts new file mode 100644 index 00000000000..d8741a2df27 --- /dev/null +++ b/assistant/src/__tests__/fixtures/mock-chrome-extension.ts @@ -0,0 +1,296 @@ +/** + * Mock Chrome extension test fixture. + * + * Opens a WebSocket to the runtime's `/v1/browser-relay` endpoint using a + * caller-supplied JWT (so the upgrade handler registers the connection + * under the guardianId encoded in the token), handles incoming + * `host_browser_request` frames by calling a mock CDP proxy, and POSTs + * the result back to `/v1/host-browser-result`. + * + * Used by e2e tests (PR 15/16) to exercise the full round-trip without + * requiring a real Chrome browser or the real extension worker. + * + * The fixture is intentionally minimal — it does not implement heartbeats, + * reconnect logic, or the legacy `ExtensionCommand` dispatch path. It only + * needs to carry host_browser_request frames end-to-end. + */ + +// ── Types ─────────────────────────────────────────────────────────── + +/** Incoming `host_browser_request` envelope (wire format). */ +export interface HostBrowserRequestFrame { + type: "host_browser_request"; + requestId: string; + conversationId: string; + cdpMethod: string; + cdpParams?: Record; + cdpSessionId?: string; + timeout_seconds?: number; +} + +/** Incoming `host_browser_cancel` envelope (wire format). */ +export interface HostBrowserCancelFrame { + type: "host_browser_cancel"; + requestId: string; +} + +/** Result body POSTed back to `/v1/host-browser-result`. */ +export interface HostBrowserResultBody { + requestId: string; + content: string; + isError: boolean; +} + +/** + * Callback that handles a CDP request and returns a + * (content, isError) pair to be POSTed back to the runtime. + * + * Tests pass in a mock that simulates `chrome.debugger.sendCommand` for a + * handful of methods (e.g. `Browser.getVersion`). + */ +export type MockCdpHandler = ( + frame: HostBrowserRequestFrame, +) => Promise<{ content: string; isError: boolean }>; + +export interface MockChromeExtensionOptions { + /** Base URL of the runtime HTTP server, e.g. `http://127.0.0.1:19801`. */ + runtimeBaseUrl: string; + /** JWT bearer token for both the WebSocket handshake and the POST callback. */ + token: string; + /** + * CDP command handler. Defaults to a handler that recognises + * `Browser.getVersion` and returns a fake product string. + */ + cdpHandler?: MockCdpHandler; + /** + * Optional extra headers forwarded on the WebSocket handshake (e.g. + * `x-guardian-id` when using a service token that doesn't carry an + * actor principal id). + */ + extraHandshakeHeaders?: Record; +} + +export interface MockChromeExtension { + /** Open the WebSocket and resolve once it's connected. */ + start(): Promise; + /** Close the WebSocket and drop any in-flight request tracking. */ + stop(): Promise; + /** + * Wait until the WebSocket has transitioned to OPEN. Useful to avoid + * races between `start()` and the runtime's `register()` bookkeeping. + */ + waitForConnection(timeoutMs?: number): Promise; + /** List of every `host_browser_request` frame received, in order. */ + receivedRequests(): ReadonlyArray; + /** List of every `host_browser_cancel` frame received, in order. */ + receivedCancels(): ReadonlyArray; + /** Swap the CDP handler at runtime (tests can inject failure modes). */ + setCdpHandler(handler: MockCdpHandler): void; + /** + * Force-close the WebSocket without going through the teardown path. + * Simulates a flaky extension that drops the connection. + */ + forceDisconnect(): void; +} + +// ── Defaults ──────────────────────────────────────────────────────── + +const DEFAULT_MOCK_BROWSER_VERSION = { + product: "Chrome/MockTest", + protocolVersion: "1.3", + revision: "@mock", + userAgent: "Mozilla/5.0 (mock chrome-extension e2e fixture)", + jsVersion: "0.0.0-mock", +}; + +/** + * Default CDP handler: answers `Browser.getVersion` with a fake product + * string. Unrecognised methods return an error envelope so tests can fail + * fast instead of hanging. + */ +const defaultCdpHandler: MockCdpHandler = async (frame) => { + if (frame.cdpMethod === "Browser.getVersion") { + return { + content: JSON.stringify(DEFAULT_MOCK_BROWSER_VERSION), + isError: false, + }; + } + return { + content: `mock-chrome-extension: unsupported cdpMethod "${frame.cdpMethod}"`, + isError: true, + }; +}; + +// ── Implementation ────────────────────────────────────────────────── + +/** + * Create a mock chrome-extension client bound to the given runtime base + * URL. The fixture does not start itself; callers must invoke `start()`. + */ +export function createMockChromeExtension( + options: MockChromeExtensionOptions, +): MockChromeExtension { + const baseHttp = options.runtimeBaseUrl.replace(/\/$/, ""); + const wsBase = baseHttp.replace(/^http/i, "ws"); + const wsUrl = `${wsBase}/v1/browser-relay?token=${encodeURIComponent(options.token)}`; + + let ws: WebSocket | null = null; + let connected = false; + let handler = options.cdpHandler ?? defaultCdpHandler; + const receivedRequests: HostBrowserRequestFrame[] = []; + const receivedCancels: HostBrowserCancelFrame[] = []; + const inFlight = new Map(); + + async function handleRequestFrame( + frame: HostBrowserRequestFrame, + ): Promise { + const abortCtl = new AbortController(); + inFlight.set(frame.requestId, abortCtl); + let result: { content: string; isError: boolean }; + try { + result = await handler(frame); + } catch (err) { + result = { + content: err instanceof Error ? err.message : String(err), + isError: true, + }; + } finally { + inFlight.delete(frame.requestId); + } + // If the request was aborted mid-flight, drop the result entirely + // (mirroring the production dispatcher, which doesn't POST a result + // for cancelled requests). + if (abortCtl.signal.aborted) return; + + const body: HostBrowserResultBody = { + requestId: frame.requestId, + content: result.content, + isError: result.isError, + }; + try { + const res = await fetch(`${baseHttp}/v1/host-browser-result`, { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${options.token}`, + }, + body: JSON.stringify(body), + }); + // Consume the body so Bun doesn't leak the response handle. + await res.body?.cancel(); + } catch { + // Best-effort — if the runtime has torn down the server, the POST + // will throw. Tests assert on proxy behaviour, not POST success. + } + } + + function handleMessage(raw: string): void { + let parsed: unknown; + try { + parsed = JSON.parse(raw); + } catch { + return; + } + if (!parsed || typeof parsed !== "object") return; + const frame = parsed as Record; + if (frame.type === "host_browser_request") { + const typed = frame as unknown as HostBrowserRequestFrame; + receivedRequests.push(typed); + void handleRequestFrame(typed); + return; + } + if (frame.type === "host_browser_cancel") { + const typed = frame as unknown as HostBrowserCancelFrame; + receivedCancels.push(typed); + const abort = inFlight.get(typed.requestId); + if (abort) { + abort.abort(); + inFlight.delete(typed.requestId); + } + return; + } + // Ignore any other frames (e.g. legacy ExtensionCommand traffic). + } + + return { + async start() { + if (ws) return; + // Bun's `WebSocket` constructor accepts a second-argument options + // object with a `headers` field (a Bun-specific extension of the + // standard WebSocket API). We forward `extraHandshakeHeaders` + // through it so tests using service tokens can supply the + // `x-guardian-id` fallback expected by `/v1/browser-relay`. + // + // We cast through `unknown` because the DOM `WebSocket` type only + // knows about `(url, protocols)`. If this fixture is ever run in + // an environment that isn't Bun, the options object would be + // silently ignored — acceptable for a test fixture. + const wsOptions: { headers?: Record } = {}; + if (options.extraHandshakeHeaders) { + wsOptions.headers = options.extraHandshakeHeaders; + } + ws = new WebSocket(wsUrl, wsOptions as unknown as string | string[]); + ws.addEventListener("open", () => { + connected = true; + }); + ws.addEventListener("message", (ev: MessageEvent) => { + const data = ev.data; + if (typeof data === "string") { + handleMessage(data); + } else if (data instanceof ArrayBuffer) { + handleMessage(new TextDecoder().decode(data)); + } + }); + ws.addEventListener("close", () => { + connected = false; + }); + }, + async stop() { + const sock = ws; + ws = null; + if (sock) { + try { + sock.close(1000, "fixture shutdown"); + } catch { + // best-effort + } + } + for (const abort of inFlight.values()) { + abort.abort(); + } + inFlight.clear(); + }, + async waitForConnection(timeoutMs = 2000) { + const deadline = Date.now() + timeoutMs; + while (!connected) { + if (Date.now() > deadline) { + throw new Error( + `mock-chrome-extension: timed out waiting for WebSocket OPEN after ${timeoutMs}ms`, + ); + } + await new Promise((r) => setTimeout(r, 10)); + } + }, + receivedRequests() { + return receivedRequests; + }, + receivedCancels() { + return receivedCancels; + }, + setCdpHandler(next) { + handler = next; + }, + forceDisconnect() { + const sock = ws; + ws = null; + connected = false; + if (sock) { + try { + sock.close(4000, "forced disconnect"); + } catch { + // best-effort + } + } + }, + }; +} diff --git a/assistant/src/__tests__/host-browser-e2e-cloud.test.ts b/assistant/src/__tests__/host-browser-e2e-cloud.test.ts new file mode 100644 index 00000000000..634341a294f --- /dev/null +++ b/assistant/src/__tests__/host-browser-e2e-cloud.test.ts @@ -0,0 +1,334 @@ +/** + * E2E smoke test for the cloud-hosted `host_browser_request` round-trip. + * + * Boots the runtime HTTP server in-process, opens a mock chrome-extension + * WebSocket against `/v1/browser-relay`, and drives + * `HostBrowserProxy.request()` end-to-end: + * + * proxy.request() + * → sendToClient (routed via ChromeExtensionRegistry by guardianId) + * → mock extension WebSocket receives host_browser_request + * → mock CDP handler (Browser.getVersion fake) + * → POST /v1/host-browser-result + * → handleHostBrowserResult → conversation.resolveHostBrowser + * → proxy.resolve() → request() resolves + * + * Covers: + * - Happy path: Browser.getVersion round-trips and returns the fake + * product string. + * - Abort: an aborted AbortSignal resolves with "Aborted" and the mock + * extension receives a host_browser_cancel frame. + * - Timeout: if the mock extension receives the frame but never + * POSTs a result, the proxy's setTimeout path fires and surfaces + * a "timed out waiting for client response" error. + * + * The test runs entirely in Bun + loopback WebSocket/fetch — no real + * Chrome required. + */ +import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test"; + +// ── Module mocks (must be declared before the real imports below) ──── + +mock.module("../util/logger.js", () => ({ + getLogger: () => + new Proxy({} as Record, { + get: () => () => {}, + }), +})); + +mock.module("../config/loader.js", () => ({ + getConfig: () => ({ + ui: {}, + model: "test", + provider: "test", + memory: { enabled: false }, + rateLimit: { maxRequestsPerMinute: 0 }, + secretDetection: { enabled: false }, + contextWindow: { maxInputTokens: 200000 }, + services: { + inference: { + mode: "your-own", + provider: "anthropic", + model: "claude-opus-4-6", + }, + "image-generation": { + mode: "your-own", + provider: "gemini", + model: "gemini-3.1-flash-image-preview", + }, + "web-search": { mode: "your-own", provider: "inference-provider-native" }, + }, + }), +})); + +// ── Real imports (after mocks) ────────────────────────────────────── + +import type { Conversation } from "../daemon/conversation.js"; +import { HostBrowserProxy } from "../daemon/host-browser-proxy.js"; +import type { ServerMessage } from "../daemon/message-protocol.js"; +import { getDb, initializeDb } from "../memory/db.js"; +import { mintToken } from "../runtime/auth/token-service.js"; +import { + __resetChromeExtensionRegistryForTests, + getChromeExtensionRegistry, +} from "../runtime/chrome-extension-registry.js"; +import { RuntimeHttpServer } from "../runtime/http-server.js"; +import * as pendingInteractions from "../runtime/pending-interactions.js"; + +initializeDb(); + +// ── Helpers ───────────────────────────────────────────────────────── + +/** + * Wrap a HostBrowserProxy in a sendToClient that: + * 1. Routes host_browser_request/host_browser_cancel via the Chrome + * extension registry for the given guardianId. + * 2. Registers a pending interaction for each request so the + * `/v1/host-browser-result` HTTP route can find the stub + * conversation and call `resolveHostBrowser` on it. + * + * Returns the proxy and its stub conversation. In production this + * wiring lives in `conversation-routes.ts` `makeHubPublisher`; the test + * reproduces the minimum surface needed for the round-trip. + */ +function createBoundProxy( + guardianId: string, + conversationId: string, +): { proxy: HostBrowserProxy; conversation: Conversation } { + // The stub Conversation's `resolveHostBrowser` routes straight back + // to the real proxy. Declare the proxy reference first so the stub + // can close over it before the proxy itself is constructed below. + let proxyRef: HostBrowserProxy | null = null; + const conversation = { + resolveHostBrowser( + requestId: string, + response: { content: string; isError: boolean }, + ) { + proxyRef?.resolve(requestId, response); + }, + } as unknown as Conversation; + + const sendToClient = (msg: ServerMessage) => { + // Register pending interactions for host_browser_request envelopes + // so the /v1/host-browser-result route can look them up. + if ((msg as { type: string }).type === "host_browser_request") { + const requestId = (msg as { requestId: string }).requestId; + pendingInteractions.register(requestId, { + conversation, + conversationId, + kind: "host_browser", + }); + } + const ok = getChromeExtensionRegistry().send(guardianId, msg); + if (!ok) { + throw new Error( + `chrome-extension host_browser send failed: no active connection for guardian ${guardianId}`, + ); + } + }; + + const proxy = new HostBrowserProxy(sendToClient); + proxyRef = proxy; + return { proxy, conversation }; +} + +/** + * Mint an actor-bound JWT for the given guardianId. The WebSocket + * upgrade handler parses `sub=actor::` + * and treats `actorPrincipalId` as the guardianId. + */ +function mintActorToken(guardianId: string): string { + return mintToken({ + aud: "vellum-daemon", + sub: `actor:self:${guardianId}`, + scope_profile: "actor_client_v1", + policy_epoch: 1, + ttlSeconds: 3600, + }); +} + +// ── Tests ─────────────────────────────────────────────────────────── + +describe("host_browser cloud-hosted e2e round-trip", () => { + let server: RuntimeHttpServer; + let port: number; + let runtimeBaseUrl: string; + + beforeEach(async () => { + // Each test gets a clean DB and a fresh registry so connection + // state doesn't leak between cases. + const db = getDb(); + db.run("DELETE FROM contact_channels"); + db.run("DELETE FROM contacts"); + pendingInteractions.clear(); + __resetChromeExtensionRegistryForTests(); + + port = 19800 + Math.floor(Math.random() * 200); + runtimeBaseUrl = `http://127.0.0.1:${port}`; + server = new RuntimeHttpServer({ port }); + await server.start(); + }); + + afterEach(async () => { + await server?.stop(); + pendingInteractions.clear(); + __resetChromeExtensionRegistryForTests(); + }); + + test("happy path: Browser.getVersion round-trips through the mock extension", async () => { + const guardianId = `test-guardian-${crypto.randomUUID()}`; + const token = mintActorToken(guardianId); + + // Dynamic import keeps the module cache warm across tests but avoids + // binding the fixture at file-load time (where the mocks might not + // yet have applied for a freshly forked test worker). + const { createMockChromeExtension } = + await import("./fixtures/mock-chrome-extension.js"); + const mockExt = createMockChromeExtension({ + runtimeBaseUrl, + token, + }); + await mockExt.start(); + await mockExt.waitForConnection(); + + // Give the open handler a tick to register the connection in the + // ChromeExtensionRegistry (Bun's WebSocket open callback fires + // asynchronously after the upgrade handler returns). + await waitForRegistryEntry(guardianId); + + const { proxy } = createBoundProxy(guardianId, "conv-happy"); + + const result = await proxy.request( + { cdpMethod: "Browser.getVersion" }, + "conv-happy", + ); + + expect(result.isError).toBe(false); + expect(result.content).toContain("Chrome/MockTest"); + + const received = mockExt.receivedRequests(); + expect(received).toHaveLength(1); + expect(received[0].cdpMethod).toBe("Browser.getVersion"); + expect(typeof received[0].requestId).toBe("string"); + expect(received[0].conversationId).toBe("conv-happy"); + + proxy.dispose(); + await mockExt.stop(); + }); + + test("abort: AbortSignal resolves to 'Aborted' and extension receives host_browser_cancel", async () => { + const guardianId = `test-guardian-${crypto.randomUUID()}`; + const token = mintActorToken(guardianId); + + const { createMockChromeExtension } = + await import("./fixtures/mock-chrome-extension.js"); + const mockExt = createMockChromeExtension({ + runtimeBaseUrl, + token, + // Hang forever so we can abort mid-flight without a race against + // the default handler's immediate response. + cdpHandler: () => new Promise(() => {}), + }); + await mockExt.start(); + await mockExt.waitForConnection(); + await waitForRegistryEntry(guardianId); + + const { proxy } = createBoundProxy(guardianId, "conv-abort"); + + const controller = new AbortController(); + const resultPromise = proxy.request( + { cdpMethod: "Browser.getVersion" }, + "conv-abort", + controller.signal, + ); + + // Wait for the mock extension to observe the request, then abort so + // the cancel envelope has somewhere to land. + await waitFor(() => mockExt.receivedRequests().length === 1); + + controller.abort(); + const result = await resultPromise; + + expect(result.content).toBe("Aborted"); + expect(result.isError).toBe(true); + + // The cancel frame is dispatched synchronously from the abort + // listener, but the WebSocket delivers it asynchronously — give it a + // few turns to arrive before asserting. + await waitFor(() => mockExt.receivedCancels().length === 1); + const cancels = mockExt.receivedCancels(); + expect(cancels).toHaveLength(1); + expect(cancels[0].requestId).toBe(mockExt.receivedRequests()[0].requestId); + + proxy.dispose(); + await mockExt.stop(); + }); + + test("timeout: proxy.request resolves with timeout error when client never responds", async () => { + const guardianId = `test-guardian-${crypto.randomUUID()}`; + const token = mintActorToken(guardianId); + + const { createMockChromeExtension } = + await import("./fixtures/mock-chrome-extension.js"); + // CDP handler that never resolves — the request frame reaches the + // mock extension successfully, but no result is ever POSTed back. + // This exercises the proxy's `setTimeout` path (as opposed to a + // synchronous send failure, which is a separate code path). + const mockExt = createMockChromeExtension({ + runtimeBaseUrl, + token, + cdpHandler: () => new Promise(() => {}), + }); + await mockExt.start(); + await mockExt.waitForConnection(); + await waitForRegistryEntry(guardianId); + + const { proxy } = createBoundProxy(guardianId, "conv-timeout"); + + // 50ms timeout — short enough to keep the test fast, long enough + // for the request frame to make the WS round-trip to the mock + // extension before the timer fires. + const result = await proxy.request( + { cdpMethod: "Browser.getVersion", timeout_seconds: 0.05 }, + "conv-timeout", + ); + + expect(result.isError).toBe(true); + expect(result.content).toContain("timed out"); + + // Sanity check: the frame actually reached the mock extension (so + // we know we're exercising the proxy's timer, not a send failure). + expect(mockExt.receivedRequests()).toHaveLength(1); + expect(mockExt.receivedRequests()[0].cdpMethod).toBe("Browser.getVersion"); + + proxy.dispose(); + await mockExt.stop(); + }); +}); + +// ── Local wait helpers ────────────────────────────────────────────── + +async function waitFor( + predicate: () => boolean, + timeoutMs = 2000, +): Promise { + const deadline = Date.now() + timeoutMs; + while (!predicate()) { + if (Date.now() > deadline) { + throw new Error( + `waitFor: predicate did not become true within ${timeoutMs}ms`, + ); + } + await new Promise((r) => setTimeout(r, 10)); + } +} + +async function waitForRegistryEntry( + guardianId: string, + timeoutMs = 2000, +): Promise { + await waitFor( + () => getChromeExtensionRegistry().get(guardianId) !== undefined, + timeoutMs, + ); +}