diff --git a/apps/web/src/domains/chat/utils/sanitize-display-messages.test.ts b/apps/web/src/domains/chat/utils/sanitize-display-messages.test.ts index 3bdd80a1d8f..3c448f7bd36 100644 --- a/apps/web/src/domains/chat/utils/sanitize-display-messages.test.ts +++ b/apps/web/src/domains/chat/utils/sanitize-display-messages.test.ts @@ -375,12 +375,285 @@ describe("sanitizeDisplayMessages · drop trailing assistant duplicate", () => { }); // --------------------------------------------------------------------------- -// Integration — all three hacks compose +// Hack #4 — repair dangling tool calls on older assistant messages +// --------------------------------------------------------------------------- + +describe("sanitizeDisplayMessages · repair dangling tool calls", () => { + const SYNTHETIC = + "Tool call completed on the server, but the result never reached the client. Subsequent assistant activity confirms the tool returned — this is a client-side data loss, not a tool failure."; + + test("patches a running tool call on an older assistant when a later assistant exists", () => { + const older = makeMessage({ + stableId: "a-old", + role: "assistant", + timestamp: 100, + toolCalls: [ + makeToolCall({ id: "tc-1", toolName: "bash", status: "running" }), + ], + }); + const later = makeMessage({ + stableId: "a-new", + role: "assistant", + content: "follow-up", + timestamp: 200, + }); + const [patchedOld, untouchedNew] = sanitizeDisplayMessages([older, later]); + expect(patchedOld!.toolCalls![0]).toEqual({ + id: "tc-1", + toolName: "bash", + input: {}, + status: "error", + isError: true, + result: SYNTHETIC, + }); + // The later assistant is untouched even if it has its own tool calls. + expect(untouchedNew).toBe(later); + }); + + test("does NOT patch the last assistant — it could still be streaming", () => { + const userMsg = makeMessage({ + stableId: "u", + role: "user", + content: "go", + timestamp: 100, + }); + const last = makeMessage({ + stableId: "a-last", + role: "assistant", + timestamp: 200, + toolCalls: [ + makeToolCall({ id: "tc-1", toolName: "bash", status: "running" }), + ], + }); + const result = sanitizeDisplayMessages([userMsg, last]); + expect(result[1]).toBe(last); + expect(result[1]!.toolCalls![0]!.status).toBe("running"); + }); + + test("does NOT patch when only a subsequent USER message exists (no assistant proof)", () => { + const onlyAssistant = makeMessage({ + stableId: "a-only", + role: "assistant", + timestamp: 100, + toolCalls: [ + makeToolCall({ id: "tc-1", toolName: "bash", status: "running" }), + ], + }); + const trailingUser = makeMessage({ + stableId: "u", + role: "user", + content: "ping", + timestamp: 200, + }); + const result = sanitizeDisplayMessages([onlyAssistant, trailingUser]); + expect(result[0]).toBe(onlyAssistant); + expect(result[0]!.toolCalls![0]!.status).toBe("running"); + }); + + test("patches across an intervening user message", () => { + const a1 = makeMessage({ + stableId: "a1", + role: "assistant", + timestamp: 100, + toolCalls: [ + makeToolCall({ id: "tc-1", toolName: "bash", status: "running" }), + ], + }); + const u = makeMessage({ + stableId: "u", + role: "user", + content: "more", + timestamp: 200, + }); + const a2 = makeMessage({ + stableId: "a2", + role: "assistant", + content: "result", + timestamp: 300, + }); + const result = sanitizeDisplayMessages([a1, u, a2]); + expect(result[0]!.toolCalls![0]!.status).toBe("error"); + expect(result[0]!.toolCalls![0]!.result).toBe(SYNTHETIC); + }); + + test("leaves `status: 'completed'` tool calls alone (not dangling)", () => { + const older = makeMessage({ + stableId: "a-old", + role: "assistant", + timestamp: 100, + toolCalls: [ + makeToolCall({ + id: "tc-1", + toolName: "bash", + status: "completed", + result: "ok", + }), + ], + }); + const later = makeMessage({ + stableId: "a-new", + role: "assistant", + content: "follow-up", + timestamp: 200, + }); + const result = sanitizeDisplayMessages([older, later]); + // No patching happened → identity preserved (COW guarantee). + expect(result).toBe(result); // sanity + expect(result[0]).toBe(older); + expect(result[1]).toBe(later); + }); + + test("leaves `status: 'error'` tool calls alone (already terminal)", () => { + const older = makeMessage({ + stableId: "a-old", + role: "assistant", + timestamp: 100, + toolCalls: [ + makeToolCall({ + id: "tc-1", + toolName: "bash", + status: "error", + isError: true, + result: "boom", + }), + ], + }); + const later = makeMessage({ + stableId: "a-new", + role: "assistant", + content: "ok", + timestamp: 200, + }); + const result = sanitizeDisplayMessages([older, later]); + expect(result[0]).toBe(older); + expect(result[0]!.toolCalls![0]!.result).toBe("boom"); + }); + + test("patches only the running tool, leaves siblings on the same message alone", () => { + const older = makeMessage({ + stableId: "a-old", + role: "assistant", + timestamp: 100, + toolCalls: [ + makeToolCall({ + id: "tc-1", + toolName: "bash", + status: "completed", + result: "first ok", + }), + makeToolCall({ id: "tc-2", toolName: "web_search", status: "running" }), + makeToolCall({ + id: "tc-3", + toolName: "read_file", + status: "completed", + result: "third ok", + }), + ], + }); + const later = makeMessage({ + stableId: "a-new", + role: "assistant", + content: "done", + timestamp: 200, + }); + const result = sanitizeDisplayMessages([older, later]); + expect(result[0]!.toolCalls![0]!.result).toBe("first ok"); + expect(result[0]!.toolCalls![1]!.status).toBe("error"); + expect(result[0]!.toolCalls![1]!.isError).toBe(true); + expect(result[0]!.toolCalls![1]!.result).toBe(SYNTHETIC); + expect(result[0]!.toolCalls![2]!.result).toBe("third ok"); + }); + + test("patches multiple older assistants in a row", () => { + const a1 = makeMessage({ + stableId: "a1", + role: "assistant", + timestamp: 100, + toolCalls: [ + makeToolCall({ id: "tc-1", toolName: "bash", status: "running" }), + ], + }); + const a2 = makeMessage({ + stableId: "a2", + role: "assistant", + timestamp: 200, + toolCalls: [ + makeToolCall({ id: "tc-2", toolName: "bash", status: "running" }), + ], + }); + const a3 = makeMessage({ + stableId: "a3", + role: "assistant", + content: "done", + timestamp: 300, + }); + const result = sanitizeDisplayMessages([a1, a2, a3]); + expect(result[0]!.toolCalls![0]!.status).toBe("error"); + expect(result[1]!.toolCalls![0]!.status).toBe("error"); + expect(result[2]).toBe(a3); + }); + + test("does not mutate the input messages or tool-call objects", () => { + const tc = makeToolCall({ id: "tc", toolName: "bash", status: "running" }); + const older = makeMessage({ + stableId: "a-old", + role: "assistant", + timestamp: 100, + toolCalls: [tc], + }); + const later = makeMessage({ + stableId: "a-new", + role: "assistant", + content: "ok", + timestamp: 200, + }); + sanitizeDisplayMessages([older, later]); + expect(tc.status).toBe("running"); + expect(tc.result).toBeUndefined(); + expect(older.toolCalls![0]).toBe(tc); + }); + + test("preserves message identity when no tool calls are dangling", () => { + // The sort step always returns a new outer array, so the array-identity + // assertion lives at the *element* level: every message object must be + // the same reference. Confirms the repair step is COW at the message + // boundary when nothing needs patching. + const a1 = makeMessage({ + stableId: "a1", + role: "assistant", + timestamp: 100, + toolCalls: [ + makeToolCall({ + id: "tc-1", + toolName: "bash", + status: "completed", + result: "ok", + }), + ], + }); + const a2 = makeMessage({ + stableId: "a2", + role: "assistant", + content: "done", + timestamp: 200, + }); + const result = sanitizeDisplayMessages([a1, a2]); + expect(result[0]).toBe(a1); + expect(result[1]).toBe(a2); + }); + + test("empty array returns empty (no crashes from index math)", () => { + expect(sanitizeDisplayMessages([])).toEqual([]); + }); +}); + +// --------------------------------------------------------------------------- +// Integration — all four hacks compose // --------------------------------------------------------------------------- describe("sanitizeDisplayMessages · integration", () => { - test("sort → invalid filter → trailing-dup drop runs in order", () => { - // Construct a messy input that exercises all three hacks at once. + test("sort → invalid filter → trailing-dup drop → dangling-tool repair runs in order", () => { + // Construct a messy input that exercises all four hacks at once. const phantom = makeMessage({ stableId: "phantom", role: "user", @@ -396,6 +669,17 @@ describe("sanitizeDisplayMessages · integration", () => { content: "What's the answer?", timestamp: 100, }); + // An older assistant message with a running tool call — its `tool_result` + // event was lost in transit. We expect hack #4 to patch this. + const olderWithDangling = makeMessage({ + stableId: "older", + role: "assistant", + textSegments: [{ type: "text", content: "let me check" }], + toolCalls: [ + makeToolCall({ id: "tc-x", toolName: "bash", status: "running" }), + ], + timestamp: 150, + }); // The "real" assistant turn (server-assigned id). const server = makeMessage({ stableId: "server-abc", @@ -416,12 +700,28 @@ describe("sanitizeDisplayMessages · integration", () => { // first; `server` precedes `orphan` in the input because the sort is // stable on equal timestamps and the production duplicate-emission // order is "server row first, orphan row second". - const result = sanitizeDisplayMessages([phantom, server, orphan, userTurn]); + const result = sanitizeDisplayMessages([ + phantom, + server, + orphan, + olderWithDangling, + userTurn, + ]); // Expect: // - phantom dropped by hack #2, - // - rows sorted by timestamp (user → server → orphan after sort), - // - trailing orphan dropped by hack #3. - expect(result.map((m) => m.stableId)).toEqual(["user", "server-abc"]); + // - rows sorted by timestamp (user → olderWithDangling → server → orphan), + // - trailing orphan dropped by hack #3 (matches `server` on text + tool calls), + // - olderWithDangling's running tool call patched by hack #4 because + // `server` is a later assistant. + expect(result.map((m) => m.stableId)).toEqual([ + "user", + "older", + "server-abc", + ]); + const patchedTool = result[1]!.toolCalls![0]!; + expect(patchedTool.status).toBe("error"); + expect(patchedTool.isError).toBe(true); + expect(patchedTool.result).toContain("client-side data loss"); }); }); diff --git a/apps/web/src/domains/chat/utils/sanitize-display-messages.ts b/apps/web/src/domains/chat/utils/sanitize-display-messages.ts index 7d30a71ea93..073e09710b1 100644 --- a/apps/web/src/domains/chat/utils/sanitize-display-messages.ts +++ b/apps/web/src/domains/chat/utils/sanitize-display-messages.ts @@ -11,6 +11,7 @@ // ----------------------------------------------------------------------------- import { sortedByTimestamp } from "@/domains/chat/utils/message-sorting.js"; +import type { ChatMessageToolCall } from "@/domains/chat/api/event-types.js"; import type { DisplayMessage } from "@/domains/chat/types/types.js"; export function sanitizeDisplayMessages( @@ -20,6 +21,7 @@ export function sanitizeDisplayMessages( sortByTimestamp, removeInvalidMessages, removeDuplicateTrailingAssistant, + repairDanglingToolCalls, ]; return pipeline.reduce((msgs, step) => step(msgs), messages); } @@ -176,3 +178,96 @@ function toolCallsMatch(a: DisplayMessage, b: DisplayMessage): boolean { } return true; } + +// ----------------------------------------------------------------------------- +// Hack #4 — repair dangling tool calls on older assistant messages +// ----------------------------------------------------------------------------- +// Why it exists: occasionally a `tool_result` SSE event is lost between the +// daemon and the client (network drop, reconnect race, server-side fanout +// glitch). The tool call stays `status: "running"` forever in the client's +// `DisplayMessage[]`, even though the assistant clearly continued — there +// is a subsequent assistant message in the transcript, which is only +// possible if the LLM provider received the tool result on the server side. +// +// The render layer shows these stuck calls as a permanent spinner on an +// older message bubble, which is misleading: the tool DID complete, the +// client just never saw the result. +// +// Predicate (must ALL hold for a tool call to be patched): +// - its parent message is `role: "assistant"`, +// - the parent message is NOT the last assistant in the transcript (the +// last assistant may still be streaming; its dangling tools could +// legitimately resolve via an in-flight `tool_result`), +// - `tool_call.status === "running"` (the UI's canonical "no result yet" +// signal — see `tool-call-chip.tsx`'s `isRunning`). +// When all three hold, mutate the tool call to: +// - `status: "error"`, +// - `isError: true`, +// - `result: SYNTHETIC_DANGLING_RESULT` (explains the client-side data loss +// so a feedback report shows the root cause, not a vague tool failure). +// +// Pipeline placement: runs AFTER `removeDuplicateTrailingAssistant` so the +// dedup filter's pairwise `result` equality check sees the original (still +// undefined) values and can correctly identify the duplicate. If both +// duplicate trailing assistants carry the same dangling tools, dedup drops +// one and the remaining one becomes the last assistant — at which point this +// step conservatively skips it. +// +// SHORT TERM until: the assistant backend reliably delivers `tool_result` +// SSE events (or the reconcile pass closes the gap by treating dangling +// tools as authoritative client-side state to repair against /v1/history). +// ----------------------------------------------------------------------------- +const SYNTHETIC_DANGLING_RESULT = + "Tool call completed on the server, but the result never reached the client. Subsequent assistant activity confirms the tool returned — this is a client-side data loss, not a tool failure."; + +function repairDanglingToolCalls( + messages: DisplayMessage[], +): DisplayMessage[] { + const lastAssistantIdx = findLastAssistantIndex(messages); + // No subsequent-assistant evidence anywhere → nothing to repair against. + if (lastAssistantIdx <= 0) return messages; + + let result: DisplayMessage[] | null = null; + for (let i = 0; i < messages.length; i++) { + const m = messages[i]!; + const isPatchable = + m.role === "assistant" && + i < lastAssistantIdx && + hasDanglingToolCall(m); + if (!isPatchable) { + if (result) result.push(m); + continue; + } + if (!result) result = messages.slice(0, i); + result.push(withRepairedToolCalls(m)); + } + return result ?? messages; +} + +function findLastAssistantIndex(messages: DisplayMessage[]): number { + for (let i = messages.length - 1; i >= 0; i--) { + if (messages[i]!.role === "assistant") return i; + } + return -1; +} + +function hasDanglingToolCall(message: DisplayMessage): boolean { + return message.toolCalls?.some((tc) => tc.status === "running") ?? false; +} + +function withRepairedToolCalls(message: DisplayMessage): DisplayMessage { + return { + ...message, + toolCalls: message.toolCalls!.map(repairIfDangling), + }; +} + +function repairIfDangling(tc: ChatMessageToolCall): ChatMessageToolCall { + if (tc.status !== "running") return tc; + return { + ...tc, + status: "error", + isError: true, + result: SYNTHETIC_DANGLING_RESULT, + }; +}