diff --git a/assistant/src/__tests__/checker.test.ts b/assistant/src/__tests__/checker.test.ts index d5968cebffd..e440d2c7207 100644 --- a/assistant/src/__tests__/checker.test.ts +++ b/assistant/src/__tests__/checker.test.ts @@ -1597,6 +1597,95 @@ describe("bash network_mode=proxied — risk capped at medium", () => { }); }); +describe("credentialed proxied bash — high risk escalation", () => { + beforeEach(() => { + mockRisk("low"); + mockIpcResponse("get_global_thresholds", DEFAULT_GATEWAY_THRESHOLDS); + _clearGlobalCacheForTesting(); + clearRiskCache(); + testConfig.skills = { load: { extraDirs: [] } }; + }); + + test("proxied bash with credential_ids sends credentialRefCount in IPC params", async () => { + mockRisk("high", { + reason: + "Proxied credential session — shell has access to injected credentials", + }); + const result = await check( + "bash", + { + command: "curl https://api.example.com", + network_mode: "proxied", + credential_ids: ["cred-abc-123"], + }, + "/tmp", + ); + expect(result.decision).toBe("prompt"); + expect(result.reason).toContain("credential"); + }); + + test("proxied bash with multiple credential_ids prompts with high risk", async () => { + mockRisk("high", { + reason: + "Proxied credential session — shell has access to injected credentials", + }); + const result = await check( + "bash", + { + command: "ls", + network_mode: "proxied", + credential_ids: ["cred-1", "cred-2"], + }, + "/tmp", + ); + expect(result.decision).toBe("prompt"); + }); + + test("proxied bash with empty credential_ids array does not escalate risk", async () => { + mockRisk("low"); + const result = await check( + "bash", + { + command: "ls", + network_mode: "proxied", + credential_ids: [], + }, + "/tmp", + ); + // Empty array means no credential refs — follows normal proxied behavior + expect(result.decision).toBe("allow"); + }); + + test("proxied bash with credential_ids containing empty strings does not escalate", async () => { + mockRisk("low"); + const result = await check( + "bash", + { + command: "ls", + network_mode: "proxied", + credential_ids: ["", ""], + }, + "/tmp", + ); + // Empty strings are filtered out, so no credential refs + expect(result.decision).toBe("allow"); + }); + + test("non-proxied bash with credential_ids follows normal flow", async () => { + mockRisk("low"); + const result = await check( + "bash", + { + command: "ls", + credential_ids: ["cred-abc-123"], + }, + "/tmp", + ); + // Without proxied mode, credential refs don't affect IPC classification + expect(result.decision).toBe("allow"); + }); +}); + describe("workspace mode — auto-allow workspace-scoped operations", () => { const workspaceDir = "/home/user/my-project"; diff --git a/assistant/src/permissions/checker.ts b/assistant/src/permissions/checker.ts index 5e29b7d85dd..d900198bd16 100644 --- a/assistant/src/permissions/checker.ts +++ b/assistant/src/permissions/checker.ts @@ -289,6 +289,17 @@ function buildClassifyRiskParams( ): ClassifyRiskParams { // ── Bash/host_bash ── if (toolName === "bash" || toolName === "host_bash") { + // Count credential references attached to this invocation. + let credentialRefCount: number | undefined; + if (Array.isArray(input.credential_ids)) { + const validIds = (input.credential_ids as unknown[]).filter( + (id) => typeof id === "string" && id.length > 0, + ); + if (validIds.length > 0) { + credentialRefCount = validIds.length; + } + } + return { tool: toolName, command: getStringField(input, "command"), @@ -297,6 +308,7 @@ function buildClassifyRiskParams( isContainerized: getIsContainerized(), networkMode: typeof input.network_mode === "string" ? input.network_mode : undefined, + credentialRefCount, }; } diff --git a/assistant/src/permissions/ipc-risk-types.ts b/assistant/src/permissions/ipc-risk-types.ts index c12267e8f88..24c98f31415 100644 --- a/assistant/src/permissions/ipc-risk-types.ts +++ b/assistant/src/permissions/ipc-risk-types.ts @@ -92,4 +92,6 @@ export interface ClassifyRiskParams { skillMetadata?: SkillMetadata; /** Tool registry default risk level for unknown tools. */ registryDefaultRisk?: string; + /** Number of credential references attached to this tool invocation. */ + credentialRefCount?: number; } diff --git a/gateway/src/ipc/risk-classification-handlers.test.ts b/gateway/src/ipc/risk-classification-handlers.test.ts index d4b1056a49a..a9a089558ad 100644 --- a/gateway/src/ipc/risk-classification-handlers.test.ts +++ b/gateway/src/ipc/risk-classification-handlers.test.ts @@ -349,6 +349,82 @@ describe("skill classification", () => { }); }); +// ── Credentialed proxied bash ─────────────────────────────────────────────── + +describe("credentialed proxied bash", () => { + test("credentialed proxied bash returns high risk even for simple curl", async () => { + const result = await classify({ + tool: "bash", + command: "curl https://api.example.com", + networkMode: "proxied", + credentialRefCount: 1, + }); + expect(result.risk).toBe("high"); + expect(result.reason).toContain("credential"); + }); + + test("credentialed proxied bash returns high risk for low-risk command", async () => { + const result = await classify({ + tool: "bash", + command: "ls", + networkMode: "proxied", + credentialRefCount: 2, + }); + expect(result.risk).toBe("high"); + expect(result.reason).toContain("credential"); + }); + + test("proxied bash without credential refs keeps existing medium cap for high-risk command", async () => { + const result = await classify({ + tool: "bash", + command: "rm -rf /", + networkMode: "proxied", + }); + // rm -rf / is high risk but gets capped to medium for non-credentialed proxied bash + expect(result.risk).toBe("medium"); + }); + + test("proxied bash without credential refs keeps low risk for low-risk command", async () => { + const result = await classify({ + tool: "bash", + command: "ls", + networkMode: "proxied", + }); + expect(result.risk).toBe("low"); + }); + + test("credentialRefCount=0 does not escalate risk", async () => { + const result = await classify({ + tool: "bash", + command: "ls", + networkMode: "proxied", + credentialRefCount: 0, + }); + expect(result.risk).toBe("low"); + }); + + test("non-proxied bash with credential refs follows normal risk flow", async () => { + const result = await classify({ + tool: "bash", + command: "ls", + credentialRefCount: 1, + }); + // Without proxied mode, credential refs don't affect classification + expect(result.risk).toBe("low"); + }); + + test("host_bash with proxied + credential refs is not affected (host_bash skips proxied cap)", async () => { + const result = await classify({ + tool: "host_bash", + command: "rm -rf /", + networkMode: "proxied", + credentialRefCount: 1, + }); + // host_bash is never affected by proxied risk logic + expect(result.risk).toBe("high"); + }); +}); + // ── Unknown tool fallback ─────────────────────────────────────────────────── describe("unknown tool fallback", () => { diff --git a/gateway/src/ipc/risk-classification-handlers.ts b/gateway/src/ipc/risk-classification-handlers.ts index 585b529b97c..914cd8b490b 100644 --- a/gateway/src/ipc/risk-classification-handlers.ts +++ b/gateway/src/ipc/risk-classification-handlers.ts @@ -73,6 +73,8 @@ const ClassifyRiskSchema = z.object({ .optional(), /** Tool registry default risk level for unknown tools. */ registryDefaultRisk: z.string().optional(), + /** Number of credential references attached to this tool invocation. */ + credentialRefCount: z.number().int().nonnegative().optional(), }); type ClassifyRiskParams = z.infer; @@ -358,17 +360,26 @@ export async function handleClassifyRisk( }); } - // Proxied bash risk cap: when running through the credential proxy, - // cap High → Medium so proxied commands don't trigger unnecessary prompts. + // Proxied bash risk classification: + // - When credentials are attached (credentialRefCount > 0), escalate to + // high risk regardless of the underlying assessment. Credentialed + // proxied shell sessions carry elevated risk and must not be + // downgraded by the general proxied-bash cap. + // - For non-credentialed proxied bash, cap High → Medium so proxied + // commands don't trigger unnecessary prompts. // Only applies to sandboxed "bash" — host_bash runs on the host machine // and should not have its risk capped. let finalRisk = assessment.riskLevel; - if ( - tool === "bash" && - params.networkMode === "proxied" && - finalRisk === "high" - ) { - finalRisk = "medium"; + let finalReason = assessment.reason; + const credentialRefCount = params.credentialRefCount ?? 0; + if (tool === "bash" && params.networkMode === "proxied") { + if (credentialRefCount > 0) { + finalRisk = "high"; + finalReason = + "Proxied credential session — shell has access to injected credentials"; + } else if (finalRisk === "high") { + finalRisk = "medium"; + } } // Collect resolved paths for directory-scoped rule enforcement. @@ -380,7 +391,7 @@ export async function handleClassifyRisk( return { risk: finalRisk, - reason: assessment.reason, + reason: finalReason, scopeOptions: assessment.scopeOptions, allowlistOptions: assessment.allowlistOptions, actionKeys,