Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 3 additions & 7 deletions assistant/src/__tests__/background-shell-bash.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";

import type { WakeOptions } from "../runtime/agent-wake.js";
import type { BackgroundTool } from "../tools/background-tool-registry.js";
import type { Tool } from "../tools/types.js";

// ── Mock modules ────────────────────────────────────────────────────────────

Expand Down Expand Up @@ -88,6 +87,8 @@ mock.module("../tools/background-tool-registry.js", () => ({

// ── Imports (after mocks) ───────────────────────────────────────────────────

import { shellTool } from "../tools/terminal/shell.js";

const baseContext = {
workingDir: process.env.VELLUM_WORKSPACE_DIR ?? "/tmp",
conversationId: "conv-bg-test",
Expand Down Expand Up @@ -117,9 +118,7 @@ function waitForWake(
}

describe("bash tool background mode", () => {
let shellTool: Tool;

beforeEach(async () => {
beforeEach(() => {
mockWakeAgentForOpportunity.mockClear();
mockRegisterBackgroundTool.mockClear();
mockRemoveBackgroundTool.mockClear();
Expand All @@ -128,9 +127,6 @@ describe("bash tool background mode", () => {
mockIsBackgroundToolLimitReached.mockClear();
mockIsBackgroundToolLimitReached.mockReturnValue(false);
registeredTools.length = 0;

const mod = await import("../tools/terminal/shell.js");
shellTool = mod.shellTool;
});

afterEach(() => {
Expand Down
4 changes: 2 additions & 2 deletions assistant/src/__tests__/computer-use-tools.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ interface JsonSchema {
}

/** Cast a tool definition's input_schema to a usable JSON Schema shape. */
function schema(tool: { input_schema: object }): JsonSchema {
function schema(tool: { input_schema?: object }): JsonSchema {
return tool.input_schema as JsonSchema;
}

Expand Down Expand Up @@ -53,7 +53,7 @@ describe("computer-use tool definitions", () => {

test("all tools have descriptions", () => {
for (const tool of allComputerUseTools) {
expect(tool.description.length).toBeGreaterThan(0);
expect(tool.description!.length).toBeGreaterThan(0);
}
});
});
Expand Down
3 changes: 1 addition & 2 deletions assistant/src/__tests__/credential-execution-tools.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import { makeAuthenticatedRequestTool } from "../tools/credential-execution/make
import { manageSecureCommandTool } from "../tools/credential-execution/manage-secure-command-tool.js";
import { runAuthenticatedCommandTool } from "../tools/credential-execution/run-authenticated-command.js";
import { cesTools, getCesToolsIfEnabled } from "../tools/tool-manifest.js";
import type { Tool } from "../tools/types.js";

// ---------------------------------------------------------------------------
// Schema shape tests
Expand Down Expand Up @@ -79,7 +78,7 @@ describe("CES tool schema shapes", () => {
describe("CES tool manifest registration", () => {
test("cesTools contains exactly three CES tools", () => {
expect(cesTools).toHaveLength(3);
const names = cesTools.map((t: Tool) => t.name);
const names = cesTools.map((t) => t.name);
expect(names).toContain("make_authenticated_request");
expect(names).toContain("run_authenticated_command");
expect(names).toContain("manage_secure_command_tool");
Expand Down
6 changes: 4 additions & 2 deletions assistant/src/__tests__/host-file-edit-tool.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ let mockFileProxyRequestFn: (
input: HostFileInput,
conversationId: string,
signal?: AbortSignal,
) => Promise<ToolExecutionResult> = () => Promise.resolve({ content: "", isError: false });
) => Promise<ToolExecutionResult> = () =>
Promise.resolve({ content: "", isError: false });

mock.module("../daemon/host-file-proxy.js", () => ({
HostFileProxy: {
Expand Down Expand Up @@ -43,7 +44,8 @@ afterEach(() => {
rmSync(dir, { recursive: true, force: true });
}
mockFileProxyAvailable = false;
mockFileProxyRequestFn = () => Promise.resolve({ content: "", isError: false });
mockFileProxyRequestFn = () =>
Promise.resolve({ content: "", isError: false });
});

describe("host_file_edit tool", () => {
Expand Down
6 changes: 4 additions & 2 deletions assistant/src/__tests__/host-file-read-tool.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ let mockFileProxyRequestFn: (
input: HostFileInput,
conversationId: string,
signal?: AbortSignal,
) => Promise<ToolExecutionResult> = () => Promise.resolve({ content: "", isError: false });
) => Promise<ToolExecutionResult> = () =>
Promise.resolve({ content: "", isError: false });

mock.module("../daemon/host-file-proxy.js", () => ({
HostFileProxy: {
Expand Down Expand Up @@ -49,7 +50,8 @@ afterEach(() => {
rmSync(dir, { recursive: true, force: true });
}
mockFileProxyAvailable = false;
mockFileProxyRequestFn = () => Promise.resolve({ content: "", isError: false });
mockFileProxyRequestFn = () =>
Promise.resolve({ content: "", isError: false });
});

// Minimal valid JPEG: FF D8 FF E0 header
Expand Down
12 changes: 9 additions & 3 deletions assistant/src/__tests__/host-file-write-tool.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ let mockFileProxyRequestFn: (
input: HostFileInput,
conversationId: string,
signal?: AbortSignal,
) => Promise<ToolExecutionResult> = () => Promise.resolve({ content: "", isError: false });
) => Promise<ToolExecutionResult> = () =>
Promise.resolve({ content: "", isError: false });

mock.module("../daemon/host-file-proxy.js", () => ({
HostFileProxy: {
Expand Down Expand Up @@ -43,7 +44,8 @@ afterEach(() => {
rmSync(dir, { recursive: true, force: true });
}
mockFileProxyAvailable = false;
mockFileProxyRequestFn = () => Promise.resolve({ content: "", isError: false });
mockFileProxyRequestFn = () =>
Promise.resolve({ content: "", isError: false });
});

describe("host_file_write tool", () => {
Expand Down Expand Up @@ -202,7 +204,11 @@ describe("host_file_write tool", () => {
};

await hostFileWriteTool.execute(
{ path: "/host/output.txt", content: "hello", target_client_id: "client-x" },
{
path: "/host/output.txt",
content: "hello",
target_client_id: "client-x",
},
makeContext(),
);

Expand Down
13 changes: 9 additions & 4 deletions assistant/src/__tests__/host-shell-tool.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,13 @@ mock.module("../util/logger.js", () => ({
// Mock the host-bash-proxy singleton so proxy delegation tests can control it.
let mockProxyAvailable = false;
let mockProxyRequestFn: (
input: { command: string; working_dir?: string; timeout_seconds?: number; env?: Record<string, string>; targetClientId?: string },
input: {
command: string;
working_dir?: string;
timeout_seconds?: number;
env?: Record<string, string>;
targetClientId?: string;
},
conversationId: string,
signal?: AbortSignal,
) => Promise<ToolExecutionResult> = () =>
Expand Down Expand Up @@ -863,9 +869,8 @@ describe("host_bash — proxy delegation", () => {

test("propagates VELLUM_UNTRUSTED_SHELL env to proxy under CES lockdown", async () => {
// Enable CES shell lockdown via the override cache
const { setOverridesForTesting } = await import(
"./feature-flag-test-helpers.js"
);
const { setOverridesForTesting } =
await import("./feature-flag-test-helpers.js");
setOverridesForTesting({
"ces-shell-lockdown": true,
});
Expand Down
9 changes: 1 addition & 8 deletions assistant/src/__tests__/managed-skill-lifecycle.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ import {
seedV2SkillEntries,
} from "../memory/v2/skill-store.js";
import { executeDeleteManagedSkill } from "../tools/skills/delete-managed.js";
import { SkillLoadTool } from "../tools/skills/load.js";
import { skillLoadTool } from "../tools/skills/load.js";
import { executeScaffoldManagedSkill } from "../tools/skills/scaffold-managed.js";
import type { ToolContext } from "../tools/types.js";

Expand Down Expand Up @@ -136,9 +136,6 @@ Run the custom lifecycle verification procedure.
expect(catalogSkill!.source).toBe("managed");
expect(catalogSkill!.displayName).toBe("E2E Custom Skill");

const skillLoadTool = new (SkillLoadTool as any)() as InstanceType<
typeof SkillLoadTool
>;
const loadResult = await skillLoadTool.execute(
{ skill: skillId },
makeContext(),
Expand Down Expand Up @@ -280,10 +277,6 @@ Run the custom lifecycle verification procedure.
test("scaffold → skill_load chain: literal tool execution", async () => {
const ctx = makeContext();

const skillLoadTool = new (SkillLoadTool as any)() as InstanceType<
typeof SkillLoadTool
>;

// Step 1: Scaffold a skill directly
const scaffoldResult = await executeScaffoldManagedSkill(
{
Expand Down
12 changes: 9 additions & 3 deletions assistant/src/__tests__/shell-observability.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";

import type { WakeOptions } from "../runtime/agent-wake.js";
import type { BackgroundTool } from "../tools/background-tool-registry.js";
import type { Tool } from "../tools/types.js";

// ── Mock modules ────────────────────────────────────────────────────────────

Expand Down Expand Up @@ -113,6 +112,15 @@ mock.module("../tools/background-tool-registry.js", () => ({

// ── Imports (after mocks) ───────────────────────────────────────────────────

// `shellTool` is imported dynamically inside `beforeEach` so the logger
// mock above lands before shell.ts evaluates and captures its `getLogger`
// reference — static imports hoist past `mock.module()` and the test
// would see the real pino logger instead of the in-memory `logCalls`
// array. The shape type below mirrors the satisfies-narrowed export so
// `shellTool.execute(...)` keeps its required-execute typing without a
// `!` bang.
let shellTool: (typeof import("../tools/terminal/shell.js"))["shellTool"];

const baseContext = {
workingDir: process.env.VELLUM_WORKSPACE_DIR ?? "/tmp",
conversationId: "conv-obs-test",
Expand Down Expand Up @@ -159,8 +167,6 @@ const isKill = (reason: string) => (c: LogCall) =>
c.fields.reason === reason;

describe("shell observability logs", () => {
let shellTool: Tool;

beforeEach(async () => {
logCalls.length = 0;
registeredTools.length = 0;
Expand Down
27 changes: 3 additions & 24 deletions assistant/src/__tests__/terminal-tools.test.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
import { existsSync, readFileSync } from "node:fs";
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";

import type { ShellOutputResult } from "../tools/shared/shell-output.js";
import type { Tool } from "../tools/types.js";
import { afterEach, describe, expect, mock, test } from "bun:test";

// ── Mock modules ────────────────────────────────────────────────────────────

Expand Down Expand Up @@ -64,13 +61,15 @@ mock.module("../tools/network/script-proxy/index.js", () => ({

// ── Imports (after mocks) ───────────────────────────────────────────────────

import { formatShellOutput } from "../tools/shared/shell-output.js";
import {
ALWAYS_INJECTED_ENV_VARS,
buildSanitizedEnv,
KATA_INJECTED_ENV_VARS,
KATA_SAFE_ENV_VARS,
SAFE_ENV_VARS,
} from "../tools/terminal/safe-env.js";
import { shellTool } from "../tools/terminal/shell.js";

// ═══════════════════════════════════════════════════════════════════════════
// Safe Environment — buildSanitizedEnv()
Expand Down Expand Up @@ -210,13 +209,6 @@ describe("buildSanitizedEnv", () => {
// ═══════════════════════════════════════════════════════════════════════════

describe("Shell tool input validation", () => {
let shellTool: Tool;

beforeEach(async () => {
const mod = await import("../tools/terminal/shell.js");
shellTool = mod.shellTool;
});

const baseContext = {
workingDir: testTmpDir,
conversationId: "test-conv-1",
Expand Down Expand Up @@ -308,19 +300,6 @@ describe("Shell tool input validation", () => {
// ═══════════════════════════════════════════════════════════════════════════

describe("formatShellOutput", () => {
let formatShellOutput: (
stdout: string,
stderr: string,
code: number | null,
timedOut: boolean,
timeoutSec: number,
) => ShellOutputResult;

beforeEach(async () => {
const mod = await import("../tools/shared/shell-output.js");
formatShellOutput = mod.formatShellOutput;
});

test("successful command with output", () => {
const result = formatShellOutput("hello world", "", 0, false, 120);
expect(result.content).toBe("hello world");
Expand Down
46 changes: 28 additions & 18 deletions assistant/src/ipc/skill-routes/__tests__/registries.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import {
getTool,
getToolOwner,
} from "../../../tools/registry.js";
import { RiskLevel } from "../../../tools/types.js";
import {
__getActiveSessionCountForTesting,
__resetActiveSessionsForTesting,
Expand Down Expand Up @@ -85,7 +86,7 @@ describe("host.registries.register_tools", () => {
});
});

test("proxy execute throws when no supervisor is attached", async () => {
test("proxy execute surfaces an error result when no supervisor is attached", async () => {
await registerToolsRoute.handler({
skillId: "stub-skill",
tools: [
Expand All @@ -101,16 +102,19 @@ describe("host.registries.register_tools", () => {

const installed = getTool("skill_stub_tool");
expect(installed).toBeDefined();
await expect(
installed!.execute(
{},
{
workingDir: "/tmp",
conversationId: "c",
trustClass: "guardian",
},
),
).rejects.toThrow(/requires an attached MeetHostSupervisor/i);
// Skill tools arrive without an `execute` closure (closures don't cross
// IPC). `finalizeTool` synthesizes a no-op error result so unsupervised
// invocations surface a clear "not wired up" signal to the model.
const result = await installed!.execute(
{},
{
workingDir: "/tmp",
conversationId: "c",
trustClass: "guardian",
},
);
expect(result.isError).toBe(true);
expect(result.content).toMatch(/no execute implementation/i);
});

test("rejects empty tool list", async () => {
Expand All @@ -119,13 +123,19 @@ describe("host.registries.register_tools", () => {
).rejects.toThrow();
});

test("rejects missing required fields", async () => {
await expect(
registerToolsRoute.handler({
skillId: "any-skill",
tools: [{ name: "missing_rest" }],
}),
).rejects.toThrow();
test("fills defaults for partial tool entries", async () => {
// Wire and author share one schema (`ToolDefinitionSchema`, all-optional)
// and the daemon runs `finalizeTool` on every incoming tool. So a
// partial entry doesn't reject — defaults fill in for missing fields.
const result = (await registerToolsRoute.handler({
skillId: "partial-skill",
tools: [{ name: "partial_tool" }],
})) as { registered: string[] };
expect(result.registered).toEqual(["partial_tool"]);
const installed = getTool("partial_tool");
expect(installed).toBeDefined();
expect(installed!.defaultRiskLevel).toBe(RiskLevel.Medium);
expect(installed!.executionTarget).toBe("sandbox");
});

test("rejects missing skillId", async () => {
Expand Down
Loading
Loading