Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 89 additions & 0 deletions assistant/src/__tests__/checker.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1597,6 +1597,95 @@ describe("bash network_mode=proxied — risk capped at medium", () => {
});
});

describe("credentialed proxied bash — high risk escalation", () => {
beforeEach(() => {
mockRisk("low");
mockIpcResponse("get_global_thresholds", DEFAULT_GATEWAY_THRESHOLDS);
_clearGlobalCacheForTesting();
clearRiskCache();
testConfig.skills = { load: { extraDirs: [] } };
});

test("proxied bash with credential_ids sends credentialRefCount in IPC params", async () => {
mockRisk("high", {
reason:
"Proxied credential session — shell has access to injected credentials",
});
const result = await check(
"bash",
{
command: "curl https://api.example.com",
network_mode: "proxied",
credential_ids: ["cred-abc-123"],
},
"/tmp",
);
expect(result.decision).toBe("prompt");
expect(result.reason).toContain("credential");
});

test("proxied bash with multiple credential_ids prompts with high risk", async () => {
mockRisk("high", {
reason:
"Proxied credential session — shell has access to injected credentials",
});
const result = await check(
"bash",
{
command: "ls",
network_mode: "proxied",
credential_ids: ["cred-1", "cred-2"],
},
"/tmp",
);
expect(result.decision).toBe("prompt");
});

test("proxied bash with empty credential_ids array does not escalate risk", async () => {
mockRisk("low");
const result = await check(
"bash",
{
command: "ls",
network_mode: "proxied",
credential_ids: [],
},
"/tmp",
);
// Empty array means no credential refs — follows normal proxied behavior
expect(result.decision).toBe("allow");
});

test("proxied bash with credential_ids containing empty strings does not escalate", async () => {
mockRisk("low");
const result = await check(
"bash",
{
command: "ls",
network_mode: "proxied",
credential_ids: ["", ""],
},
"/tmp",
);
// Empty strings are filtered out, so no credential refs
expect(result.decision).toBe("allow");
});

test("non-proxied bash with credential_ids follows normal flow", async () => {
mockRisk("low");
const result = await check(
"bash",
{
command: "ls",
credential_ids: ["cred-abc-123"],
},
"/tmp",
);
// Without proxied mode, credential refs don't affect IPC classification
expect(result.decision).toBe("allow");
});
});

describe("workspace mode — auto-allow workspace-scoped operations", () => {
const workspaceDir = "/home/user/my-project";

Expand Down
12 changes: 12 additions & 0 deletions assistant/src/permissions/checker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,17 @@ function buildClassifyRiskParams(
): ClassifyRiskParams {
// ── Bash/host_bash ──
if (toolName === "bash" || toolName === "host_bash") {
// Count credential references attached to this invocation.
let credentialRefCount: number | undefined;
if (Array.isArray(input.credential_ids)) {
const validIds = (input.credential_ids as unknown[]).filter(
(id) => typeof id === "string" && id.length > 0,
);
if (validIds.length > 0) {
credentialRefCount = validIds.length;
}
}

return {
tool: toolName,
command: getStringField(input, "command"),
Expand All @@ -297,6 +308,7 @@ function buildClassifyRiskParams(
isContainerized: getIsContainerized(),
networkMode:
typeof input.network_mode === "string" ? input.network_mode : undefined,
credentialRefCount,
};
}

Expand Down
2 changes: 2 additions & 0 deletions assistant/src/permissions/ipc-risk-types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -92,4 +92,6 @@ export interface ClassifyRiskParams {
skillMetadata?: SkillMetadata;
/** Tool registry default risk level for unknown tools. */
registryDefaultRisk?: string;
/** Number of credential references attached to this tool invocation. */
credentialRefCount?: number;
}
76 changes: 76 additions & 0 deletions gateway/src/ipc/risk-classification-handlers.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,82 @@ describe("skill classification", () => {
});
});

// ── Credentialed proxied bash ───────────────────────────────────────────────

describe("credentialed proxied bash", () => {
test("credentialed proxied bash returns high risk even for simple curl", async () => {
const result = await classify({
tool: "bash",
command: "curl https://api.example.com",
networkMode: "proxied",
credentialRefCount: 1,
});
expect(result.risk).toBe("high");
expect(result.reason).toContain("credential");
});

test("credentialed proxied bash returns high risk for low-risk command", async () => {
const result = await classify({
tool: "bash",
command: "ls",
networkMode: "proxied",
credentialRefCount: 2,
});
expect(result.risk).toBe("high");
expect(result.reason).toContain("credential");
});

test("proxied bash without credential refs keeps existing medium cap for high-risk command", async () => {
const result = await classify({
tool: "bash",
command: "rm -rf /",
networkMode: "proxied",
});
// rm -rf / is high risk but gets capped to medium for non-credentialed proxied bash
expect(result.risk).toBe("medium");
});

test("proxied bash without credential refs keeps low risk for low-risk command", async () => {
const result = await classify({
tool: "bash",
command: "ls",
networkMode: "proxied",
});
expect(result.risk).toBe("low");
});

test("credentialRefCount=0 does not escalate risk", async () => {
const result = await classify({
tool: "bash",
command: "ls",
networkMode: "proxied",
credentialRefCount: 0,
});
expect(result.risk).toBe("low");
});

test("non-proxied bash with credential refs follows normal risk flow", async () => {
const result = await classify({
tool: "bash",
command: "ls",
credentialRefCount: 1,
});
// Without proxied mode, credential refs don't affect classification
expect(result.risk).toBe("low");
});

test("host_bash with proxied + credential refs is not affected (host_bash skips proxied cap)", async () => {
const result = await classify({
tool: "host_bash",
command: "rm -rf /",
networkMode: "proxied",
credentialRefCount: 1,
});
// host_bash is never affected by proxied risk logic
expect(result.risk).toBe("high");
});
});

// ── Unknown tool fallback ───────────────────────────────────────────────────

describe("unknown tool fallback", () => {
Expand Down
29 changes: 20 additions & 9 deletions gateway/src/ipc/risk-classification-handlers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ const ClassifyRiskSchema = z.object({
.optional(),
/** Tool registry default risk level for unknown tools. */
registryDefaultRisk: z.string().optional(),
/** Number of credential references attached to this tool invocation. */
credentialRefCount: z.number().int().nonnegative().optional(),
});

type ClassifyRiskParams = z.infer<typeof ClassifyRiskSchema>;
Expand Down Expand Up @@ -358,17 +360,26 @@ export async function handleClassifyRisk(
});
}

// Proxied bash risk cap: when running through the credential proxy,
// cap High → Medium so proxied commands don't trigger unnecessary prompts.
// Proxied bash risk classification:
// - When credentials are attached (credentialRefCount > 0), escalate to
// high risk regardless of the underlying assessment. Credentialed
// proxied shell sessions carry elevated risk and must not be
// downgraded by the general proxied-bash cap.
// - For non-credentialed proxied bash, cap High → Medium so proxied
// commands don't trigger unnecessary prompts.
// Only applies to sandboxed "bash" — host_bash runs on the host machine
// and should not have its risk capped.
let finalRisk = assessment.riskLevel;
if (
tool === "bash" &&
params.networkMode === "proxied" &&
finalRisk === "high"
) {
finalRisk = "medium";
let finalReason = assessment.reason;
const credentialRefCount = params.credentialRefCount ?? 0;
if (tool === "bash" && params.networkMode === "proxied") {
if (credentialRefCount > 0) {
finalRisk = "high";
finalReason =
"Proxied credential session — shell has access to injected credentials";
} else if (finalRisk === "high") {
finalRisk = "medium";
}
}

// Collect resolved paths for directory-scoped rule enforcement.
Expand All @@ -380,7 +391,7 @@ export async function handleClassifyRisk(

return {
risk: finalRisk,
reason: assessment.reason,
reason: finalReason,
scopeOptions: assessment.scopeOptions,
allowlistOptions: assessment.allowlistOptions,
actionKeys,
Expand Down
Loading