vellum-ai · alex-nork · May 12, 2026 · May 12, 2026 · May 12, 2026 · May 12, 2026
diff --git a/assistant/src/__tests__/checker.test.ts b/assistant/src/__tests__/checker.test.ts
@@ -1597,6 +1597,95 @@ describe("bash network_mode=proxied — risk capped at medium", () => {
   });
 });
 
+describe("credentialed proxied bash — high risk escalation", () => {
+  beforeEach(() => {
+    mockRisk("low");
+    mockIpcResponse("get_global_thresholds", DEFAULT_GATEWAY_THRESHOLDS);
+    _clearGlobalCacheForTesting();
+    clearRiskCache();
+    testConfig.skills = { load: { extraDirs: [] } };
+  });
+
+  test("proxied bash with credential_ids sends credentialRefCount in IPC params", async () => {
+    mockRisk("high", {
+      reason:
+        "Proxied credential session — shell has access to injected credentials",
+    });
+    const result = await check(
+      "bash",
+      {
+        command: "curl https://api.example.com",
+        network_mode: "proxied",
+        credential_ids: ["cred-abc-123"],
+      },
+      "/tmp",
+    );
+    expect(result.decision).toBe("prompt");
+    expect(result.reason).toContain("credential");
+  });
+
+  test("proxied bash with multiple credential_ids prompts with high risk", async () => {
+    mockRisk("high", {
+      reason:
+        "Proxied credential session — shell has access to injected credentials",
+    });
+    const result = await check(
+      "bash",
+      {
+        command: "ls",
+        network_mode: "proxied",
+        credential_ids: ["cred-1", "cred-2"],
+      },
+      "/tmp",
+    );
+    expect(result.decision).toBe("prompt");
+  });
+
+  test("proxied bash with empty credential_ids array does not escalate risk", async () => {
+    mockRisk("low");
+    const result = await check(
+      "bash",
+      {
+        command: "ls",
+        network_mode: "proxied",
+        credential_ids: [],
+      },
+      "/tmp",
+    );
+    // Empty array means no credential refs — follows normal proxied behavior
+    expect(result.decision).toBe("allow");
+  });
+
+  test("proxied bash with credential_ids containing empty strings does not escalate", async () => {
+    mockRisk("low");
+    const result = await check(
+      "bash",
+      {
+        command: "ls",
+        network_mode: "proxied",
+        credential_ids: ["", ""],
+      },
+      "/tmp",
+    );
+    // Empty strings are filtered out, so no credential refs
+    expect(result.decision).toBe("allow");
+  });
+
+  test("non-proxied bash with credential_ids follows normal flow", async () => {
+    mockRisk("low");
+    const result = await check(
+      "bash",
+      {
+        command: "ls",
+        credential_ids: ["cred-abc-123"],
+      },
+      "/tmp",
+    );
+    // Without proxied mode, credential refs don't affect IPC classification
+    expect(result.decision).toBe("allow");
+  });
+});
+
 describe("workspace mode — auto-allow workspace-scoped operations", () => {
   const workspaceDir = "/home/user/my-project";
 

diff --git a/assistant/src/__tests__/shell-credential-ref.test.ts b/assistant/src/__tests__/shell-credential-ref.test.ts
@@ -93,6 +93,7 @@ afterAll(() => {
 describe("shell tool credential ref resolution", () => {
   test("service/field ref resolves to UUID and reaches session creation", async () => {
     const meta = upsertCredentialMetadata("fal", "api_key", {
+      allowedTools: ["bash"],
       injectionTemplates: [
         {
           hostPattern: "*.fal.ai",
@@ -120,7 +121,9 @@ describe("shell tool credential ref resolution", () => {
   });
 
   test("UUID ref remains supported", async () => {
-    const meta = upsertCredentialMetadata("github", "token");
+    const meta = upsertCredentialMetadata("github", "token", {
+      allowedTools: ["bash"],
+    });
 
     await shellTool.execute(
       {
@@ -156,7 +159,9 @@ describe("shell tool credential ref resolution", () => {
   });
 
   test("mixed known+unknown refs fails fast (no partial execution)", async () => {
-    upsertCredentialMetadata("fal", "api_key");
+    upsertCredentialMetadata("fal", "api_key", {
+      allowedTools: ["bash"],
+    });
 
     const result = await shellTool.execute(
       {
@@ -175,7 +180,9 @@ describe("shell tool credential ref resolution", () => {
   });
 
   test("duplicate refs are deduped", async () => {
-    const meta = upsertCredentialMetadata("fal", "api_key");
+    const meta = upsertCredentialMetadata("fal", "api_key", {
+      allowedTools: ["bash"],
+    });
 
     await shellTool.execute(
       {
@@ -209,4 +216,89 @@ describe("shell tool credential ref resolution", () => {
     expect(result.isError).toBeFalsy();
     expect(mockGetOrStartSession).not.toHaveBeenCalled();
   });
+
+  test("credential with allowedTools excluding bash is denied for proxied shell", async () => {
+    upsertCredentialMetadata("vercel", "api_token", {
+      allowedTools: ["publish_page"],
+      injectionTemplates: [
+        {
+          hostPattern: "api.vercel.com",
+          injectionType: "header",
+          headerName: "Authorization",
+          valuePrefix: "Bearer ",
+        },
+      ],
+    });
+
+    const result = await shellTool.execute(
+      {
+        command: "curl https://api.vercel.com/v1/projects",
+        activity: "test",
+        network_mode: "proxied",
+        credential_ids: ["vercel/api_token"],
+      },
+      ctx,
+    );
+
+    expect(result.isError).toBe(true);
+    expect(result.content).toContain("credential tool policy denied");
+    expect(result.content).toContain("not bash");
+    // Must not call getOrStartSession — policy denial happens before session creation
+    expect(mockGetOrStartSession).not.toHaveBeenCalled();
+  });
+
+  test("credential with allowedTools including bash starts proxied session", async () => {
+    const meta = upsertCredentialMetadata("deploy_svc", "api_key", {
+      allowedTools: ["bash"],
+      injectionTemplates: [
+        {
+          hostPattern: "*.deploy-svc.io",
+          injectionType: "header",
+          headerName: "Authorization",
+          valuePrefix: "Bearer ",
+        },
+      ],
+    });
+
+    await shellTool.execute(
+      {
+        command: "echo deploy",
+        activity: "test",
+        network_mode: "proxied",
+        credential_ids: ["deploy_svc/api_key"],
+      },
+      ctx,
+    );
+
+    // Session should be created with the resolved credential ID
+    expect(mockGetOrStartSession).toHaveBeenCalledTimes(1);
+    const callArgs = mockGetOrStartSession.mock.calls[0];
+    expect(callArgs[1]).toEqual([meta.credentialId]);
+  });
+
+  test("mixed allowed and denied credentials fail the whole command before session creation", async () => {
+    upsertCredentialMetadata("allowed_svc", "token", {
+      allowedTools: ["bash"],
+    });
+    upsertCredentialMetadata("denied_svc", "token", {
+      allowedTools: ["publish_page"],
+    });
+
+    const result = await shellTool.execute(
+      {
+        command: "echo mixed",
+        activity: "test",
+        network_mode: "proxied",
+        credential_ids: ["allowed_svc/token", "denied_svc/token"],
+      },
+      ctx,
+    );
+
+    expect(result.isError).toBe(true);
+    expect(result.content).toContain("credential tool policy denied");
+    expect(result.content).toContain("denied_svc/token");
+    expect(result.content).toContain("not bash");
+    // Must not call getOrStartSession — even one denied credential blocks the whole command
+    expect(mockGetOrStartSession).not.toHaveBeenCalled();
+  });
 });
diff --git a/assistant/src/__tests__/shell-tool-proxy-mode.test.ts b/assistant/src/__tests__/shell-tool-proxy-mode.test.ts
@@ -136,6 +136,20 @@ mock.module("../tools/credentials/resolve.js", () => ({
   resolveCredentialRef: (ref: string) => ({ credentialId: ref }),
 }));
 
+mock.module("../tools/credentials/metadata-store.js", () => ({
+  getCredentialMetadataById: (id: string) => ({
+    service: "test",
+    field: id,
+    allowedTools: ["bash"],
+    allowedDomains: [],
+  }),
+}));
+
+mock.module("../tools/credentials/tool-policy.js", () => ({
+  isToolAllowed: (toolName: string, allowedTools: string[]) =>
+    Array.isArray(allowedTools) && allowedTools.includes(toolName),
+}));
+
 mock.module("../tools/network/script-proxy/logging.js", () => ({
   buildCredentialRefTrace: (
     rawRefs: string[],