From 26830f5c7a861da775d2a882101a4f3a7ff38f07 Mon Sep 17 00:00:00 2001
From: Noa Flaherty <noa@vellum.ai>
Date: Tue, 7 Apr 2026 20:54:06 -0400
Subject: [PATCH] fix(chrome-extension-native-host): halt unauthorized origins
 and forward guardianId

---
 .../host-browser-e2e-self-hosted.test.ts      |  17 +
 .../src/__tests__/index.test.ts               | 325 ++++++++++++++++++
 .../chrome-extension-native-host/src/index.ts |  79 +++--
 3 files changed, 383 insertions(+), 38 deletions(-)
 create mode 100644 clients/chrome-extension-native-host/src/__tests__/index.test.ts

diff --git a/assistant/src/__tests__/host-browser-e2e-self-hosted.test.ts b/assistant/src/__tests__/host-browser-e2e-self-hosted.test.ts
index 11b1cb98a06..69aac868558 100644
--- a/assistant/src/__tests__/host-browser-e2e-self-hosted.test.ts
+++ b/assistant/src/__tests__/host-browser-e2e-self-hosted.test.ts
@@ -282,12 +282,25 @@ describe("host-browser E2E — self-hosted native messaging path", () => {
         type: string;
         token?: string;
         expiresAt?: string;
+        guardianId?: string;
       };
       expect(frame.type).toBe("token_response");
       expect(typeof frame.token).toBe("string");
       expect(frame.token!.length).toBeGreaterThan(0);
       expect(typeof frame.expiresAt).toBe("string");
 
+      // Gap 3 regression guard: the helper must surface the
+      // guardianId returned by /v1/browser-extension-pair on the
+      // native-messaging frame so the chrome extension's
+      // bootstrapLocalToken() can persist it. The route's
+      // resolveLocalGuardianId() falls back to the literal string
+      // "local" when no vellum guardian is bootstrapped, which is
+      // the case in this test environment, so we assert against the
+      // exact value as well as a non-empty type guard.
+      expect(typeof frame.guardianId).toBe("string");
+      expect(frame.guardianId!.length).toBeGreaterThan(0);
+      expect(frame.guardianId).toBe("local");
+
       // The returned token must verify via the in-process capability
       // verifier — this is the core invariant the native-messaging
       // bootstrap promises. The daemon is the only party that could
@@ -298,6 +311,10 @@ describe("host-browser E2E — self-hosted native messaging path", () => {
       expect(claims?.capability).toBe("host_browser_command");
       expect(typeof claims?.guardianId).toBe("string");
       expect(claims?.guardianId.length).toBeGreaterThan(0);
+      // The frame's guardianId should match the claim's guardianId —
+      // both originate from the same `resolveLocalGuardianId()` call
+      // inside the route handler.
+      expect(frame.guardianId).toBe(claims?.guardianId);
       // expiresAt in the response frame should agree with the numeric
       // claim expiry to within ISO-string precision.
       const iso = new Date(claims!.expiresAt).toISOString();
diff --git a/clients/chrome-extension-native-host/src/__tests__/index.test.ts b/clients/chrome-extension-native-host/src/__tests__/index.test.ts
new file mode 100644
index 00000000000..a8d7e649091
--- /dev/null
+++ b/clients/chrome-extension-native-host/src/__tests__/index.test.ts
@@ -0,0 +1,325 @@
+/**
+ * Subprocess regression tests for the Chrome native messaging helper —
+ * targeting the two P1 bugs surfaced by the PR #24159 self-review:
+ *
+ *   - **Gap 2**: the unauthorized-origin branch must terminate the
+ *     process synchronously and never POST `/v1/browser-extension-pair`.
+ *     The previous `writeFrameAndExit` returned a forever-pending Promise
+ *     after queuing the write, which meant `await`-ing it yielded the
+ *     event loop and let the stdin listener run before the helper
+ *     actually exited.
+ *
+ *   - **Gap 3**: the helper must surface the `guardianId` field returned
+ *     by `/v1/browser-extension-pair` in its `token_response` frame.
+ *     Earlier versions destructured only `{ token, expiresAt }` and
+ *     silently dropped the field, causing `bootstrapLocalToken()` on the
+ *     consumer side to reject the frame as malformed.
+ *
+ * These tests are intentionally separate from `integration.test.ts` so
+ * the regression coverage is colocated with the bugs it pins down — if
+ * either gap regresses, the failure points directly at the responsible
+ * scenario rather than getting buried in the broader integration suite.
+ *
+ * The suite skips gracefully when `dist/index.js` is missing so cold
+ * checkouts don't break CI.
+ */
+
+import { existsSync } from "node:fs";
+import { dirname, resolve } from "node:path";
+import { fileURLToPath } from "node:url";
+
+import { afterAll, beforeAll, describe, expect, test } from "bun:test";
+
+import { decodeFrames, encodeFrame } from "../protocol.js";
+
+// ---------------------------------------------------------------------------
+// Paths & skip guard
+// ---------------------------------------------------------------------------
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+
+const HELPER_BINARY = resolve(__dirname, "../../dist/index.js");
+const HELPER_EXISTS = existsSync(HELPER_BINARY);
+
+const SKIP_REASON =
+  "clients/chrome-extension-native-host/dist/index.js is missing — run `bun run build` in clients/chrome-extension-native-host to enable these tests.";
+
+// The native helper hard-codes a placeholder allowlist with this single
+// dev id. The companion route in
+// `assistant/src/runtime/routes/browser-extension-pair-routes.ts` mirrors
+// it; if either side ever diverges, both these tests and the e2e
+// self-hosted test in `assistant/src/__tests__/` will fail loudly.
+const ALLOWED_ORIGIN = "chrome-extension://aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/";
+const DISALLOWED_ORIGIN =
+  "chrome-extension://bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb/";
+
+// ---------------------------------------------------------------------------
+// Mock pair-endpoint server
+// ---------------------------------------------------------------------------
+
+interface MockPairServer {
+  server: ReturnType<typeof Bun.serve>;
+  port: number;
+  /** All requests received by the mock server, in order. */
+  requests: Array<{ pathname: string; body: unknown }>;
+  /** Body the next pair request should return. */
+  nextResponseBody: () => Record<string, unknown>;
+  stop: () => void;
+}
+
+/**
+ * Boot a tiny `Bun.serve` listener on a free port that records every
+ * request and responds with whatever `nextResponseBody` produces. The
+ * test can mutate `nextResponseBody` between scenarios to swap fixtures
+ * (e.g. drop `guardianId` from the response to exercise the
+ * malformed-frame rejection path).
+ */
+function startMockPairServer(): MockPairServer {
+  const state: MockPairServer = {
+    server: null as unknown as ReturnType<typeof Bun.serve>,
+    port: 0,
+    requests: [],
+    nextResponseBody: () => ({
+      token: "tok-1",
+      expiresAt: "2026-12-31T00:00:00Z",
+      guardianId: "g-1",
+    }),
+    stop: () => {
+      /* replaced below */
+    },
+  };
+
+  const server = Bun.serve({
+    port: 0,
+    hostname: "127.0.0.1",
+    async fetch(req) {
+      const url = new URL(req.url);
+      let body: unknown = null;
+      try {
+        body = await req.json();
+      } catch {
+        body = null;
+      }
+      state.requests.push({ pathname: url.pathname, body });
+
+      if (
+        url.pathname !== "/v1/browser-extension-pair" ||
+        req.method !== "POST"
+      ) {
+        return new Response("not found", { status: 404 });
+      }
+
+      return Response.json(state.nextResponseBody());
+    },
+  });
+
+  state.server = server;
+  state.port = server.port as number;
+  state.stop = () => server.stop(true);
+  return state;
+}
+
+// ---------------------------------------------------------------------------
+// Subprocess helpers
+// ---------------------------------------------------------------------------
+
+interface HelperRunResult {
+  frames: unknown[];
+  stderr: string;
+  exitCode: number;
+}
+
+/**
+ * Spawn the helper as a subprocess via `Bun.spawn`, write the framed
+ * `request_token` payload to its stdin, and collect stdout / stderr /
+ * exit code with a hard upper bound on wall clock time. We deliberately
+ * use `Bun.spawn` here (rather than node:child_process as the e2e test
+ * does) so the test surface matches the PR plan and so we get
+ * `proc.exited` as a clean Promise.
+ */
+async function runHelper(options: {
+  extensionOrigin: string;
+  assistantPort: number;
+  stdinBytes: Buffer;
+  timeoutMs?: number;
+}): Promise<HelperRunResult> {
+  const args = [
+    "node",
+    HELPER_BINARY,
+    options.extensionOrigin,
+    "--assistant-port",
+    String(options.assistantPort),
+  ];
+
+  const proc = Bun.spawn(args, {
+    stdin: "pipe",
+    stdout: "pipe",
+    stderr: "pipe",
+    env: { ...process.env },
+  });
+
+  proc.stdin.write(options.stdinBytes);
+  await proc.stdin.end();
+
+  let timedOut = false;
+  const timeoutMs = options.timeoutMs ?? 1000;
+  const timer = setTimeout(() => {
+    timedOut = true;
+    try {
+      proc.kill("SIGKILL");
+    } catch {
+      /* already exited */
+    }
+  }, timeoutMs);
+
+  const exitCode = await proc.exited;
+  clearTimeout(timer);
+
+  const stdoutBuffer = Buffer.from(await new Response(proc.stdout).arrayBuffer());
+  const stderrText = await new Response(proc.stderr).text();
+
+  if (timedOut) {
+    throw new Error(
+      `helper binary did not exit within ${timeoutMs}ms — stderr: ${stderrText}`,
+    );
+  }
+
+  let frames: unknown[];
+  try {
+    frames = decodeFrames(stdoutBuffer).frames;
+  } catch (err) {
+    const detail = err instanceof Error ? err.message : String(err);
+    throw new Error(
+      `failed to decode helper stdout frames: ${detail}; raw stderr: ${stderrText}`,
+    );
+  }
+
+  return { frames, stderr: stderrText, exitCode };
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+describe("native host — Gap 2 / Gap 3 regression coverage", () => {
+  let pair: MockPairServer | null = null;
+
+  beforeAll(() => {
+    if (!HELPER_EXISTS) return;
+    pair = startMockPairServer();
+    // Sanity-check the bound port so a wrong-port misconfig doesn't
+    // silently masquerade as a network failure inside the helper.
+    if (!pair.port) {
+      throw new Error("mock pair server failed to bind");
+    }
+  });
+
+  afterAll(() => {
+    if (pair) pair.stop();
+  });
+
+  if (!HELPER_EXISTS) {
+    test.skip(`native helper binary not built — ${SKIP_REASON}`, () => {
+      /* intentionally empty */
+    });
+    return;
+  }
+
+  test("Gap 2: unauthorized origin halts before contacting the pair endpoint", async () => {
+    const srv = pair!;
+    srv.requests.length = 0;
+
+    const result = await runHelper({
+      extensionOrigin: DISALLOWED_ORIGIN,
+      assistantPort: srv.port,
+      // Pre-write a request_token frame so that if the unauthorized
+      // branch ever falls through to the stdin listener (the Gap 2
+      // bug), the helper would observe a frame and POST the pair
+      // endpoint. Both of those side effects are asserted below.
+      stdinBytes: encodeFrame({ type: "request_token" }),
+      timeoutMs: 1000,
+    });
+
+    // The helper must terminate with a non-zero exit code (we use the
+    // documented value of 1) within the 1s timeout. If `runHelper`
+    // throws on timeout, this assertion will not run.
+    expect(result.exitCode).toBe(1);
+
+    // Exactly one error frame on stdout — never a token_response.
+    expect(result.frames).toHaveLength(1);
+    const frame = result.frames[0] as { type?: unknown; message?: unknown };
+    expect(frame.type).toBe("error");
+    expect(frame.message).toBe("unauthorized_origin");
+
+    // The critical Gap 2 invariant: the helper must NOT have POSTed
+    // anything to /v1/browser-extension-pair. If the unauthorized
+    // branch falls through and the stdin listener runs, the mock
+    // server's `requests` array will contain at least one entry.
+    expect(srv.requests).toHaveLength(0);
+  });
+
+  test("Gap 3: authorized origin forwards guardianId in the token_response frame", async () => {
+    const srv = pair!;
+    srv.requests.length = 0;
+    srv.nextResponseBody = () => ({
+      token: "tok-1",
+      expiresAt: "2026-12-31T00:00:00Z",
+      guardianId: "g-1",
+    });
+
+    const result = await runHelper({
+      extensionOrigin: ALLOWED_ORIGIN,
+      assistantPort: srv.port,
+      stdinBytes: encodeFrame({ type: "request_token" }),
+      timeoutMs: 2000,
+    });
+
+    expect(result.exitCode, `helper stderr: ${result.stderr}`).toBe(0);
+    expect(result.frames).toHaveLength(1);
+
+    const frame = result.frames[0] as {
+      type?: unknown;
+      token?: unknown;
+      expiresAt?: unknown;
+      guardianId?: unknown;
+    };
+    expect(frame.type).toBe("token_response");
+    expect(frame.token).toBe("tok-1");
+    expect(frame.expiresAt).toBe("2026-12-31T00:00:00Z");
+    expect(frame.guardianId).toBe("g-1");
+
+    // The helper should have made exactly one POST to the pair
+    // endpoint, carrying the extension origin we passed on argv.
+    expect(srv.requests).toHaveLength(1);
+    expect(srv.requests[0]!.pathname).toBe("/v1/browser-extension-pair");
+    expect(srv.requests[0]!.body).toEqual({ extensionOrigin: ALLOWED_ORIGIN });
+  });
+
+  test("Gap 3: missing guardianId in the pair response is rejected with an error frame", async () => {
+    const srv = pair!;
+    srv.requests.length = 0;
+    // Mock returns a body without `guardianId`. The helper's
+    // request-token validation should catch the missing field and
+    // surface an error frame instead of writing a malformed
+    // token_response.
+    srv.nextResponseBody = () => ({
+      token: "tok-1",
+      expiresAt: "2026-12-31T00:00:00Z",
+    });
+
+    const result = await runHelper({
+      extensionOrigin: ALLOWED_ORIGIN,
+      assistantPort: srv.port,
+      stdinBytes: encodeFrame({ type: "request_token" }),
+      timeoutMs: 2000,
+    });
+
+    expect(result.exitCode).not.toBe(0);
+    expect(result.frames).toHaveLength(1);
+    const frame = result.frames[0] as { type?: unknown; message?: unknown };
+    expect(frame.type).toBe("error");
+    expect(typeof frame.message).toBe("string");
+    expect(frame.message).toMatch(/guardianId/);
+  });
+});
diff --git a/clients/chrome-extension-native-host/src/index.ts b/clients/chrome-extension-native-host/src/index.ts
index e0acdc04399..3a6c48b6bf4 100644
--- a/clients/chrome-extension-native-host/src/index.ts
+++ b/clients/chrome-extension-native-host/src/index.ts
@@ -18,7 +18,7 @@
  *      `--assistant-port`, then `~/.vellum/runtime-port`, then defaulting to
  *      7821).
  *   4. Echo the assistant's response back to Chrome as a
- *      `{ type: "token_response", token, expiresAt }` frame.
+ *      `{ type: "token_response", token, expiresAt, guardianId }` frame.
  *   5. On any unrecoverable error, write a `{ type: "error", message }` frame
  *      and exit with a non-zero status.
  *
@@ -55,6 +55,7 @@ const RUNTIME_PORT_FILE = join(homedir(), ".vellum", "runtime-port");
 interface TokenResponse {
   token: string;
   expiresAt: string;
+  guardianId: string;
 }
 
 /**
@@ -132,29 +133,33 @@ function parseExtensionId(origin: string | undefined): string | null {
 }
 
 /**
- * Write a single native-messaging frame to stdout and exit the process
- * once the underlying write has actually been flushed.
+ * Write a single native-messaging frame to stdout and synchronously
+ * terminate the process.
  *
  * Chrome native messaging hosts communicate over a pipe-backed stdout.
- * `process.stdout.write()` may buffer data internally and flush
- * asynchronously, so calling `process.exit()` immediately after a write
- * can tear the process down before the frame ever reaches Chrome — the
- * extension then sees a disconnect instead of the intended response or
- * error frame. Using the callback form of `write()` defers `exit()` until
- * the buffer has been handed off, which is what we want here.
+ * Earlier versions of this helper used the callback form of
+ * `process.stdout.write()` to defer `process.exit()` until the buffer had
+ * been handed off, but that introduced a subtle bug: the function had to
+ * return a forever-pending Promise to satisfy the `never` return type,
+ * meaning callers that awaited it would yield control of the event loop
+ * before the process terminated. In the unauthorized-origin path that
+ * meant the stdin listener could be installed and start handling frames
+ * before the helper actually exited.
  *
- * The function never returns to the caller (declared as `never`), so it
- * is safe to use in code paths whose control flow assumes the process
- * has terminated.
+ * The synchronous form below avoids that hazard entirely:
+ * `process.exit()` is itself `never`, so the function legitimately never
+ * returns and there is no event-loop tick between the write and the
+ * exit. Any partial flushing concerns are accepted as a trade-off — the
+ * exit code is the authoritative signal to Chrome regardless of whether
+ * the frame body made it across the pipe.
  */
 function writeFrameAndExit(payload: unknown, exitCode: number): never {
-  process.stdout.write(encodeFrame(payload), () => {
-    process.exit(exitCode);
-  });
-  // The callback above will fire on the next tick of the event loop and
-  // terminate the process. We block here forever so callers can rely on
-  // a `never` return type without us racing the callback.
-  return new Promise<never>(() => {}) as unknown as never;
+  try {
+    process.stdout.write(encodeFrame(payload));
+  } catch {
+    // ignore — exit code is the authoritative signal
+  }
+  process.exit(exitCode);
 }
 
 /**
@@ -163,18 +168,13 @@ function writeFrameAndExit(payload: unknown, exitCode: number): never {
  * (or a Chrome extension developer inspecting the host's stderr stream)
  * can see what went wrong.
  *
- * Uses `writeFrameAndExit` so the error frame is actually flushed to
- * Chrome before the process terminates.
+ * Uses `writeFrameAndExit` so the error frame is written to stdout before
+ * the process terminates. `writeFrameAndExit` handles its own write
+ * failures internally, so no additional try/catch is needed here.
  */
 function fail(message: string, code = 1): never {
   process.stderr.write(`vellum-chrome-native-host: ${message}\n`);
-  try {
-    return writeFrameAndExit({ type: "error", message }, code);
-  } catch {
-    // If even queuing the error frame fails (e.g. stdout already closed),
-    // there's nothing useful to do — just exit.
-    process.exit(code);
-  }
+  writeFrameAndExit({ type: "error", message }, code);
 }
 
 /**
@@ -229,8 +229,11 @@ async function requestToken(
     throw new Error("assistant pair response missing token / expiresAt");
   }
 
-  const { token, expiresAt } = body as TokenResponse;
-  return { token, expiresAt };
+  const { token, expiresAt, guardianId } = body as TokenResponse;
+  if (typeof guardianId !== "string" || guardianId.length === 0) {
+    throw new Error("pair endpoint response missing guardianId");
+  }
+  return { token, expiresAt, guardianId };
 }
 
 async function main(): Promise<void> {
@@ -254,12 +257,12 @@ async function main(): Promise<void> {
     process.stderr.write(
       `vellum-chrome-native-host: unauthorized_origin (got ${extensionOrigin ?? "<none>"})\n`,
     );
-    try {
-      writeFrameAndExit({ type: "error", message: "unauthorized_origin" }, 1);
-    } catch {
-      // Ignore — exit code is the authoritative signal here.
-      process.exit(1);
-    }
+    writeFrameAndExit({ type: "error", message: "unauthorized_origin" }, 1);
+    // Defense-in-depth: even though writeFrameAndExit calls process.exit
+    // synchronously and is typed `never`, an explicit `return` here
+    // guarantees we never fall through to the stdin listener setup below
+    // if a future refactor accidentally makes writeFrameAndExit async.
+    return;
   }
 
   // Reading stdin in 4-byte-framed chunks. Chrome may deliver a single
@@ -298,12 +301,12 @@ async function main(): Promise<void> {
         }
 
         try {
-          const { token, expiresAt } = await requestToken(
+          const { token, expiresAt, guardianId } = await requestToken(
             extensionOrigin!,
             process.argv,
           );
           writeFrameAndExit(
-            { type: "token_response", token, expiresAt },
+            { type: "token_response", token, expiresAt, guardianId },
             0,
           );
         } catch (err) {