Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 85 additions & 40 deletions assistant/src/daemon/__tests__/conversation-tool-setup.test.ts
Original file line number Diff line number Diff line change
@@ -1,20 +1,18 @@
/**
* Tests for `isToolActiveForContext` host-tool capability gating.
*
* The chrome-extension interface only supports the `host_browser` host proxy
* capability, so host tools must be gated by
* `supportsHostProxy(transport, capability)` instead of a single
* `hasNoClient` check. These tests assert that:
* Two scenarios are verified:
* - chrome-extension is its own executor and is exempt from the hasNoClient
* gate (the extension's own popup UI gates commands; there is no SSE
* interactive approval channel, and chrome-extension turns intentionally
* run with `hasNoClient: true` because chrome-extension is not in
* `INTERACTIVE_INTERFACES`).
* - macos still requires a connected SSE client for interactive approval, so
* `hasNoClient: true` continues to deny all host tools on macos.
*
* - Each host tool is only projected for transports whose interface supports
* the matching capability (e.g. `host_bash` is only active for macOS, not
* chrome-extension — regression coverage for the Codex P1 host-tool leak).
* - The `hasNoClient` precondition still takes precedence so HTTP-only paths
* never see host tools.
* - Contexts without a `transportInterface` fall back to the permissive
* coarse-grained behavior so callers that have not yet plumbed the field
* through `SkillProjectionContext` continue to see the host tools their
* `hasNoClient` state allows.
* The per-capability check (`supportsHostProxy(transport, capability)`) runs
* first and is authoritative for structural support, so host_bash/host_file_*
* remain filtered out for chrome-extension (Codex P1 leak stays plugged).
*/

import { describe, expect, test } from "bun:test";
Expand All @@ -38,7 +36,8 @@ function makeCtx(
}

describe("isToolActiveForContext — host tool capability gating", () => {
test("host_bash is active for macOS transport (full host proxy support)", () => {
// macOS transport: SSE-based interactive approval required.
test("host_bash is active for macOS with a connected client", () => {
expect(
isToolActiveForContext(
"host_bash",
Expand All @@ -47,76 +46,122 @@ describe("isToolActiveForContext — host tool capability gating", () => {
).toBe(true);
});

test("host_bash is NOT active for chrome-extension transport (Codex P1 regression)", () => {
// Regression coverage: chrome-extension only supports `host_browser`, so
// `host_bash` must NOT be projected even though a client is connected
// (`hasNoClient: false`). Without per-capability gating the model could
// attempt host_bash calls that the transport cannot dispatch.
test("host_bash is NOT active for macOS when hasNoClient is true (security invariant)", () => {
// macOS uses an SSE-based interactive approval channel. Without a
// connected client the guardian auto-approve path could execute host
// commands unattended, so host tools must be denied.
expect(
isToolActiveForContext(
"host_bash",
makeCtx({
hasNoClient: false,
transportInterface: "chrome-extension",
}),
makeCtx({ hasNoClient: true, transportInterface: "macos" }),
),
).toBe(false);
});

test("host_file_read is NOT active for macOS when hasNoClient is true", () => {
expect(
isToolActiveForContext(
"host_file_read",
makeCtx({ hasNoClient: true, transportInterface: "macos" }),
),
).toBe(false);
});

test("host_browser is active for chrome-extension transport", () => {
test("host_browser is active for macOS with a connected client", () => {
expect(
isToolActiveForContext(
"host_browser",
makeCtx({ hasNoClient: false, transportInterface: "macos" }),
),
).toBe(true);
});

test("host_browser is NOT active for macOS when hasNoClient is true", () => {
// macOS requires a client for any host tool — the SSE interactive
// approval channel must be available regardless of capability.
expect(
isToolActiveForContext(
"host_browser",
makeCtx({ hasNoClient: true, transportInterface: "macos" }),
),
).toBe(false);
});

// chrome-extension transport: the extension is its own executor.
test("host_browser is active for chrome-extension when hasNoClient is true (regression fix)", () => {
// Regression coverage for PR #24195: the per-capability tool gate was
// previously placed inside the `hasNoClient` short-circuit, which
Comment on lines +92 to +93

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Remove historical narrative from test comment

assistant/AGENTS.md explicitly says updated comments must not reference removed behavior, but this new comment says the gate was "previously placed" elsewhere and cites an old PR. That violates the project’s comment policy and makes maintenance harder because tests should describe current invariants, not implementation history; please rewrite this as a present-tense behavior assertion only.

Useful? React with 👍 / 👎.

// filtered `host_browser` out for chrome-extension turns (which run
// with `hasNoClient: true` by design). The extension is its own
// executor and gates commands via its own popup UI, so it must be
// exempt from the hasNoClient gate.
expect(
isToolActiveForContext(
"host_browser",
makeCtx({
hasNoClient: false,
hasNoClient: true,
transportInterface: "chrome-extension",
}),
),
).toBe(true);
});

test("host_browser is active for macOS transport", () => {
test("host_browser is active for chrome-extension when hasNoClient is false", () => {
expect(
isToolActiveForContext(
"host_browser",
makeCtx({ hasNoClient: false, transportInterface: "macos" }),
makeCtx({
hasNoClient: false,
transportInterface: "chrome-extension",
}),
),
).toBe(true);
});

test("host_file_read is NOT active for chrome-extension transport", () => {
test("host_bash is NOT active for chrome-extension even when hasNoClient is true (Codex P1 leak)", () => {
// The per-capability check runs first and is authoritative: chrome-extension
// only supports `host_browser`, so `host_bash` must remain filtered out.
expect(
isToolActiveForContext(
"host_file_read",
"host_bash",
makeCtx({
hasNoClient: false,
hasNoClient: true,
transportInterface: "chrome-extension",
}),
),
).toBe(false);
});

test("host_bash respects hasNoClient even when transport supports it", () => {
// The existing `hasNoClient` gate must continue to take precedence: even
// a macOS-capable transport must not surface host tools when no client is
// actually connected (e.g. the HTTP-only path).
test("host_file_read is NOT active for chrome-extension when hasNoClient is true", () => {
expect(
isToolActiveForContext(
"host_bash",
makeCtx({ hasNoClient: true, transportInterface: "macos" }),
"host_file_read",
makeCtx({
hasNoClient: true,
transportInterface: "chrome-extension",
}),
),
).toBe(false);
});

test("host_bash falls back to permissive behavior when transport is undefined", () => {
// Backwards-compat fallback: contexts that don't pass a transport
// interface (e.g. tests, callers that haven't plumbed the new field)
// keep the coarse-grained behavior so we don't accidentally hide tools.
// Backwards-compat fallback: no transport plumbed through.
test("host_bash falls back to hasNoClient gate when transport is undefined (client connected)", () => {
// Without a transport interface we cannot run the per-capability check,
// so we fall back to the coarse-grained `hasNoClient` behavior.
expect(
isToolActiveForContext(
"host_bash",
makeCtx({ hasNoClient: false, transportInterface: undefined }),
),
).toBe(true);
});

test("host_bash falls back to hasNoClient gate when transport is undefined (no client)", () => {
expect(
isToolActiveForContext(
"host_bash",
makeCtx({ hasNoClient: true, transportInterface: undefined }),
),
).toBe(false);
});
});
36 changes: 20 additions & 16 deletions assistant/src/daemon/conversation-tool-setup.ts
Original file line number Diff line number Diff line change
Expand Up @@ -531,24 +531,28 @@ export function isToolActiveForContext(
return ctx.channelCapabilities?.supportsDynamicUi ?? !ctx.hasNoClient;
}
if (HOST_TOOL_NAMES.has(name)) {
// Host tools require a connected client — without one, there is no human
// to approve execution and the guardian auto-approve path would allow
// unchecked host command execution on the daemon host.
if (ctx.hasNoClient) return false;
// Then gate per-capability against the connected interface so that
// partial-capability transports (e.g. chrome-extension only supports
// `host_browser`) only see host tools their interface can service.
// Without this check, host_bash/host_file_* would surface for
// chrome-extension sessions and the model would attempt calls the
// transport cannot dispatch.
const capability = HOST_TOOL_TO_CAPABILITY.get(name);
const transport = ctx.transportInterface;
// Backwards-compat fallback: contexts that have not yet been plumbed
// with a transport interface (tests, callers that don't pass the new
// field) keep the coarse-grained behavior so we don't accidentally hide
// tools from them.
if (!transport || !capability) return true;
return supportsHostProxy(transport, capability);

// Per-capability check is authoritative for structural support: if the
// transport cannot service this capability, the tool is filtered out.
if (transport && capability && !supportsHostProxy(transport, capability)) {
return false;
}

// chrome-extension is its own executor — the extension's popup gates
// commands via its own UI, and the transport does not use an SSE-level
// interactive approval channel. hasNoClient is intentionally `true` for
// chrome-extension turns (chrome-extension is not in INTERACTIVE_INTERFACES)
// and must not gate host_browser. Trust the per-capability check.
if (transport === "chrome-extension") {
return true;
}

// For transports that surface approvals over SSE (macos, backwards-compat
// fallback), deny when no client is present so the guardian auto-approve
// path cannot execute host commands unattended.
return !ctx.hasNoClient;
}
if (CLIENT_CAPABILITY_TOOL_NAMES.has(name)) {
return !ctx.hasNoClient;
Expand Down
Loading