vellum-ai · dvargasfuertes · May 30, 2026 · May 29, 2026 · May 29, 2026 · May 29, 2026
diff --git a/assistant/src/__tests__/background-shell-bash.test.ts b/assistant/src/__tests__/background-shell-bash.test.ts
@@ -2,7 +2,6 @@ import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
 
 import type { WakeOptions } from "../runtime/agent-wake.js";
 import type { BackgroundTool } from "../tools/background-tool-registry.js";
-import type { Tool } from "../tools/types.js";
 
 // ── Mock modules ────────────────────────────────────────────────────────────
 
@@ -88,6 +87,8 @@ mock.module("../tools/background-tool-registry.js", () => ({
 
 // ── Imports (after mocks) ───────────────────────────────────────────────────
 
+import { shellTool } from "../tools/terminal/shell.js";
+
 const baseContext = {
   workingDir: process.env.VELLUM_WORKSPACE_DIR ?? "/tmp",
   conversationId: "conv-bg-test",
@@ -117,9 +118,7 @@ function waitForWake(
 }
 
 describe("bash tool background mode", () => {
-  let shellTool: Tool;
-
-  beforeEach(async () => {
+  beforeEach(() => {
     mockWakeAgentForOpportunity.mockClear();
     mockRegisterBackgroundTool.mockClear();
     mockRemoveBackgroundTool.mockClear();
@@ -128,9 +127,6 @@ describe("bash tool background mode", () => {
     mockIsBackgroundToolLimitReached.mockClear();
     mockIsBackgroundToolLimitReached.mockReturnValue(false);
     registeredTools.length = 0;
-
-    const mod = await import("../tools/terminal/shell.js");
-    shellTool = mod.shellTool;
   });
 
   afterEach(() => {

diff --git a/assistant/src/__tests__/computer-use-tools.test.ts b/assistant/src/__tests__/computer-use-tools.test.ts
@@ -23,7 +23,7 @@ interface JsonSchema {
 }
 
 /** Cast a tool definition's input_schema to a usable JSON Schema shape. */
-function schema(tool: { input_schema: object }): JsonSchema {
+function schema(tool: { input_schema?: object }): JsonSchema {
   return tool.input_schema as JsonSchema;
 }
 
@@ -53,7 +53,7 @@ describe("computer-use tool definitions", () => {
 
   test("all tools have descriptions", () => {
     for (const tool of allComputerUseTools) {
-      expect(tool.description.length).toBeGreaterThan(0);
+      expect(tool.description!.length).toBeGreaterThan(0);
     }
   });
 });

diff --git a/assistant/src/__tests__/credential-execution-tools.test.ts b/assistant/src/__tests__/credential-execution-tools.test.ts
@@ -5,7 +5,6 @@ import { makeAuthenticatedRequestTool } from "../tools/credential-execution/make
 import { manageSecureCommandTool } from "../tools/credential-execution/manage-secure-command-tool.js";
 import { runAuthenticatedCommandTool } from "../tools/credential-execution/run-authenticated-command.js";
 import { cesTools, getCesToolsIfEnabled } from "../tools/tool-manifest.js";
-import type { Tool } from "../tools/types.js";
 
 // ---------------------------------------------------------------------------
 // Schema shape tests
@@ -79,7 +78,7 @@ describe("CES tool schema shapes", () => {
 describe("CES tool manifest registration", () => {
   test("cesTools contains exactly three CES tools", () => {
     expect(cesTools).toHaveLength(3);
-    const names = cesTools.map((t: Tool) => t.name);
+    const names = cesTools.map((t) => t.name);
     expect(names).toContain("make_authenticated_request");
     expect(names).toContain("run_authenticated_command");
     expect(names).toContain("manage_secure_command_tool");

diff --git a/assistant/src/__tests__/host-file-edit-tool.test.ts b/assistant/src/__tests__/host-file-edit-tool.test.ts
@@ -12,7 +12,8 @@ let mockFileProxyRequestFn: (
   input: HostFileInput,
   conversationId: string,
   signal?: AbortSignal,
-) => Promise<ToolExecutionResult> = () => Promise.resolve({ content: "", isError: false });
+) => Promise<ToolExecutionResult> = () =>
+  Promise.resolve({ content: "", isError: false });
 
 mock.module("../daemon/host-file-proxy.js", () => ({
   HostFileProxy: {
@@ -43,7 +44,8 @@ afterEach(() => {
     rmSync(dir, { recursive: true, force: true });
   }
   mockFileProxyAvailable = false;
-  mockFileProxyRequestFn = () => Promise.resolve({ content: "", isError: false });
+  mockFileProxyRequestFn = () =>
+    Promise.resolve({ content: "", isError: false });
 });
 
 describe("host_file_edit tool", () => {

diff --git a/assistant/src/__tests__/host-file-read-tool.test.ts b/assistant/src/__tests__/host-file-read-tool.test.ts
@@ -12,7 +12,8 @@ let mockFileProxyRequestFn: (
   input: HostFileInput,
   conversationId: string,
   signal?: AbortSignal,
-) => Promise<ToolExecutionResult> = () => Promise.resolve({ content: "", isError: false });
+) => Promise<ToolExecutionResult> = () =>
+  Promise.resolve({ content: "", isError: false });
 
 mock.module("../daemon/host-file-proxy.js", () => ({
   HostFileProxy: {
@@ -49,7 +50,8 @@ afterEach(() => {
     rmSync(dir, { recursive: true, force: true });
   }
   mockFileProxyAvailable = false;
-  mockFileProxyRequestFn = () => Promise.resolve({ content: "", isError: false });
+  mockFileProxyRequestFn = () =>
+    Promise.resolve({ content: "", isError: false });
 });
 
 // Minimal valid JPEG: FF D8 FF E0 header

diff --git a/assistant/src/__tests__/host-file-write-tool.test.ts b/assistant/src/__tests__/host-file-write-tool.test.ts
@@ -12,7 +12,8 @@ let mockFileProxyRequestFn: (
   input: HostFileInput,
   conversationId: string,
   signal?: AbortSignal,
-) => Promise<ToolExecutionResult> = () => Promise.resolve({ content: "", isError: false });
+) => Promise<ToolExecutionResult> = () =>
+  Promise.resolve({ content: "", isError: false });
 
 mock.module("../daemon/host-file-proxy.js", () => ({
   HostFileProxy: {
@@ -43,7 +44,8 @@ afterEach(() => {
     rmSync(dir, { recursive: true, force: true });
   }
   mockFileProxyAvailable = false;
-  mockFileProxyRequestFn = () => Promise.resolve({ content: "", isError: false });
+  mockFileProxyRequestFn = () =>
+    Promise.resolve({ content: "", isError: false });
 });
 
 describe("host_file_write tool", () => {
@@ -202,7 +204,11 @@ describe("host_file_write tool", () => {
     };
 
     await hostFileWriteTool.execute(
-      { path: "/host/output.txt", content: "hello", target_client_id: "client-x" },
+      {
+        path: "/host/output.txt",
+        content: "hello",
+        target_client_id: "client-x",
+      },
       makeContext(),
     );
 

diff --git a/assistant/src/__tests__/host-shell-tool.test.ts b/assistant/src/__tests__/host-shell-tool.test.ts
@@ -56,7 +56,13 @@ mock.module("../util/logger.js", () => ({
 // Mock the host-bash-proxy singleton so proxy delegation tests can control it.
 let mockProxyAvailable = false;
 let mockProxyRequestFn: (
-  input: { command: string; working_dir?: string; timeout_seconds?: number; env?: Record<string, string>; targetClientId?: string },
+  input: {
+    command: string;
+    working_dir?: string;
+    timeout_seconds?: number;
+    env?: Record<string, string>;
+    targetClientId?: string;
+  },
   conversationId: string,
   signal?: AbortSignal,
 ) => Promise<ToolExecutionResult> = () =>
@@ -863,9 +869,8 @@ describe("host_bash — proxy delegation", () => {
 
   test("propagates VELLUM_UNTRUSTED_SHELL env to proxy under CES lockdown", async () => {
     // Enable CES shell lockdown via the override cache
-    const { setOverridesForTesting } = await import(
-  "./feature-flag-test-helpers.js"
-);
+    const { setOverridesForTesting } =
+      await import("./feature-flag-test-helpers.js");
     setOverridesForTesting({
       "ces-shell-lockdown": true,
     });

diff --git a/assistant/src/__tests__/managed-skill-lifecycle.test.ts b/assistant/src/__tests__/managed-skill-lifecycle.test.ts
@@ -87,7 +87,7 @@ import {
   seedV2SkillEntries,
 } from "../memory/v2/skill-store.js";
 import { executeDeleteManagedSkill } from "../tools/skills/delete-managed.js";
-import { SkillLoadTool } from "../tools/skills/load.js";
+import { skillLoadTool } from "../tools/skills/load.js";
 import { executeScaffoldManagedSkill } from "../tools/skills/scaffold-managed.js";
 import type { ToolContext } from "../tools/types.js";
 
@@ -136,9 +136,6 @@ Run the custom lifecycle verification procedure.
     expect(catalogSkill!.source).toBe("managed");
     expect(catalogSkill!.displayName).toBe("E2E Custom Skill");
 
-    const skillLoadTool = new (SkillLoadTool as any)() as InstanceType<
-      typeof SkillLoadTool
-    >;
     const loadResult = await skillLoadTool.execute(
       { skill: skillId },
       makeContext(),
@@ -280,10 +277,6 @@ Run the custom lifecycle verification procedure.
   test("scaffold → skill_load chain: literal tool execution", async () => {
     const ctx = makeContext();
 
-    const skillLoadTool = new (SkillLoadTool as any)() as InstanceType<
-      typeof SkillLoadTool
-    >;
-
     // Step 1: Scaffold a skill directly
     const scaffoldResult = await executeScaffoldManagedSkill(
       {

diff --git a/assistant/src/__tests__/shell-observability.test.ts b/assistant/src/__tests__/shell-observability.test.ts
@@ -16,7 +16,6 @@ import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
 
 import type { WakeOptions } from "../runtime/agent-wake.js";
 import type { BackgroundTool } from "../tools/background-tool-registry.js";
-import type { Tool } from "../tools/types.js";
 
 // ── Mock modules ────────────────────────────────────────────────────────────
 
@@ -113,6 +112,15 @@ mock.module("../tools/background-tool-registry.js", () => ({
 
 // ── Imports (after mocks) ───────────────────────────────────────────────────
 
+// `shellTool` is imported dynamically inside `beforeEach` so the logger
+// mock above lands before shell.ts evaluates and captures its `getLogger`
+// reference — static imports hoist past `mock.module()` and the test
+// would see the real pino logger instead of the in-memory `logCalls`
+// array. The shape type below mirrors the satisfies-narrowed export so
+// `shellTool.execute(...)` keeps its required-execute typing without a
+// `!` bang.
+let shellTool: (typeof import("../tools/terminal/shell.js"))["shellTool"];
+
 const baseContext = {
   workingDir: process.env.VELLUM_WORKSPACE_DIR ?? "/tmp",
   conversationId: "conv-obs-test",
@@ -159,8 +167,6 @@ const isKill = (reason: string) => (c: LogCall) =>
   c.fields.reason === reason;
 
 describe("shell observability logs", () => {
-  let shellTool: Tool;
-
   beforeEach(async () => {
     logCalls.length = 0;
     registeredTools.length = 0;

diff --git a/assistant/src/__tests__/terminal-tools.test.ts b/assistant/src/__tests__/terminal-tools.test.ts
@@ -1,8 +1,5 @@
 import { existsSync, readFileSync } from "node:fs";
-import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
-
-import type { ShellOutputResult } from "../tools/shared/shell-output.js";
-import type { Tool } from "../tools/types.js";
+import { afterEach, describe, expect, mock, test } from "bun:test";
 
 // ── Mock modules ────────────────────────────────────────────────────────────
 
@@ -64,13 +61,15 @@ mock.module("../tools/network/script-proxy/index.js", () => ({
 
 // ── Imports (after mocks) ───────────────────────────────────────────────────
 
+import { formatShellOutput } from "../tools/shared/shell-output.js";
 import {
   ALWAYS_INJECTED_ENV_VARS,
   buildSanitizedEnv,
   KATA_INJECTED_ENV_VARS,
   KATA_SAFE_ENV_VARS,
   SAFE_ENV_VARS,
 } from "../tools/terminal/safe-env.js";
+import { shellTool } from "../tools/terminal/shell.js";
 
 // ═══════════════════════════════════════════════════════════════════════════
 //  Safe Environment — buildSanitizedEnv()
@@ -210,13 +209,6 @@ describe("buildSanitizedEnv", () => {
 // ═══════════════════════════════════════════════════════════════════════════
 
 describe("Shell tool input validation", () => {
-  let shellTool: Tool;
-
-  beforeEach(async () => {
-    const mod = await import("../tools/terminal/shell.js");
-    shellTool = mod.shellTool;
-  });
-
   const baseContext = {
     workingDir: testTmpDir,
     conversationId: "test-conv-1",
@@ -308,19 +300,6 @@ describe("Shell tool input validation", () => {
 // ═══════════════════════════════════════════════════════════════════════════
 
 describe("formatShellOutput", () => {
-  let formatShellOutput: (
-    stdout: string,
-    stderr: string,
-    code: number | null,
-    timedOut: boolean,
-    timeoutSec: number,
-  ) => ShellOutputResult;
-
-  beforeEach(async () => {
-    const mod = await import("../tools/shared/shell-output.js");
-    formatShellOutput = mod.formatShellOutput;
-  });
-
   test("successful command with output", () => {
     const result = formatShellOutput("hello world", "", 0, false, 120);
     expect(result.content).toBe("hello world");

diff --git a/assistant/src/ipc/skill-routes/__tests__/registries.test.ts b/assistant/src/ipc/skill-routes/__tests__/registries.test.ts
@@ -21,6 +21,7 @@ import {
   getTool,
   getToolOwner,
 } from "../../../tools/registry.js";
+import { RiskLevel } from "../../../tools/types.js";
 import {
   __getActiveSessionCountForTesting,
   __resetActiveSessionsForTesting,
@@ -85,7 +86,7 @@ describe("host.registries.register_tools", () => {
     });
   });
 
-  test("proxy execute throws when no supervisor is attached", async () => {
+  test("proxy execute surfaces an error result when no supervisor is attached", async () => {
     await registerToolsRoute.handler({
       skillId: "stub-skill",
       tools: [
@@ -101,16 +102,19 @@ describe("host.registries.register_tools", () => {
 
     const installed = getTool("skill_stub_tool");
     expect(installed).toBeDefined();
-    await expect(
-      installed!.execute(
-        {},
-        {
-          workingDir: "/tmp",
-          conversationId: "c",
-          trustClass: "guardian",
-        },
-      ),
-    ).rejects.toThrow(/requires an attached MeetHostSupervisor/i);
+    // Skill tools arrive without an `execute` closure (closures don't cross
+    // IPC). `finalizeTool` synthesizes a no-op error result so unsupervised
+    // invocations surface a clear "not wired up" signal to the model.
+    const result = await installed!.execute(
+      {},
+      {
+        workingDir: "/tmp",
+        conversationId: "c",
+        trustClass: "guardian",
+      },
+    );
+    expect(result.isError).toBe(true);
+    expect(result.content).toMatch(/no execute implementation/i);
   });
 
   test("rejects empty tool list", async () => {
@@ -119,13 +123,19 @@ describe("host.registries.register_tools", () => {
     ).rejects.toThrow();
   });
 
-  test("rejects missing required fields", async () => {
-    await expect(
-      registerToolsRoute.handler({
-        skillId: "any-skill",
-        tools: [{ name: "missing_rest" }],
-      }),
-    ).rejects.toThrow();
+  test("fills defaults for partial tool entries", async () => {
+    // Wire and author share one schema (`ToolDefinitionSchema`, all-optional)
+    // and the daemon runs `finalizeTool` on every incoming tool. So a
+    // partial entry doesn't reject — defaults fill in for missing fields.
+    const result = (await registerToolsRoute.handler({
+      skillId: "partial-skill",
+      tools: [{ name: "partial_tool" }],
+    })) as { registered: string[] };
+    expect(result.registered).toEqual(["partial_tool"]);
+    const installed = getTool("partial_tool");
+    expect(installed).toBeDefined();
+    expect(installed!.defaultRiskLevel).toBe(RiskLevel.Medium);
+    expect(installed!.executionTarget).toBe("sandbox");
   });
 
   test("rejects missing skillId", async () => {