vellum-ai · dvargasfuertes · May 25, 2026 · May 25, 2026
diff --git a/apps/web/src/domains/chat/utils/sanitize-display-messages.test.ts b/apps/web/src/domains/chat/utils/sanitize-display-messages.test.ts
@@ -375,12 +375,285 @@ describe("sanitizeDisplayMessages · drop trailing assistant duplicate", () => {
 });
 
 // ---------------------------------------------------------------------------
-// Integration — all three hacks compose
+// Hack #4 — repair dangling tool calls on older assistant messages
+// ---------------------------------------------------------------------------
+
+describe("sanitizeDisplayMessages · repair dangling tool calls", () => {
+  const SYNTHETIC =
+    "Tool call completed on the server, but the result never reached the client. Subsequent assistant activity confirms the tool returned — this is a client-side data loss, not a tool failure.";
+
+  test("patches a running tool call on an older assistant when a later assistant exists", () => {
+    const older = makeMessage({
+      stableId: "a-old",
+      role: "assistant",
+      timestamp: 100,
+      toolCalls: [
+        makeToolCall({ id: "tc-1", toolName: "bash", status: "running" }),
+      ],
+    });
+    const later = makeMessage({
+      stableId: "a-new",
+      role: "assistant",
+      content: "follow-up",
+      timestamp: 200,
+    });
+    const [patchedOld, untouchedNew] = sanitizeDisplayMessages([older, later]);
+    expect(patchedOld!.toolCalls![0]).toEqual({
+      id: "tc-1",
+      toolName: "bash",
+      input: {},
+      status: "error",
+      isError: true,
+      result: SYNTHETIC,
+    });
+    // The later assistant is untouched even if it has its own tool calls.
+    expect(untouchedNew).toBe(later);
+  });
+
+  test("does NOT patch the last assistant — it could still be streaming", () => {
+    const userMsg = makeMessage({
+      stableId: "u",
+      role: "user",
+      content: "go",
+      timestamp: 100,
+    });
+    const last = makeMessage({
+      stableId: "a-last",
+      role: "assistant",
+      timestamp: 200,
+      toolCalls: [
+        makeToolCall({ id: "tc-1", toolName: "bash", status: "running" }),
+      ],
+    });
+    const result = sanitizeDisplayMessages([userMsg, last]);
+    expect(result[1]).toBe(last);
+    expect(result[1]!.toolCalls![0]!.status).toBe("running");
+  });
+
+  test("does NOT patch when only a subsequent USER message exists (no assistant proof)", () => {
+    const onlyAssistant = makeMessage({
+      stableId: "a-only",
+      role: "assistant",
+      timestamp: 100,
+      toolCalls: [
+        makeToolCall({ id: "tc-1", toolName: "bash", status: "running" }),
+      ],
+    });
+    const trailingUser = makeMessage({
+      stableId: "u",
+      role: "user",
+      content: "ping",
+      timestamp: 200,
+    });
+    const result = sanitizeDisplayMessages([onlyAssistant, trailingUser]);
+    expect(result[0]).toBe(onlyAssistant);
+    expect(result[0]!.toolCalls![0]!.status).toBe("running");
+  });
+
+  test("patches across an intervening user message", () => {
+    const a1 = makeMessage({
+      stableId: "a1",
+      role: "assistant",
+      timestamp: 100,
+      toolCalls: [
+        makeToolCall({ id: "tc-1", toolName: "bash", status: "running" }),
+      ],
+    });
+    const u = makeMessage({
+      stableId: "u",
+      role: "user",
+      content: "more",
+      timestamp: 200,
+    });
+    const a2 = makeMessage({
+      stableId: "a2",
+      role: "assistant",
+      content: "result",
+      timestamp: 300,
+    });
+    const result = sanitizeDisplayMessages([a1, u, a2]);
+    expect(result[0]!.toolCalls![0]!.status).toBe("error");
+    expect(result[0]!.toolCalls![0]!.result).toBe(SYNTHETIC);
+  });
+
+  test("leaves `status: 'completed'` tool calls alone (not dangling)", () => {
+    const older = makeMessage({
+      stableId: "a-old",
+      role: "assistant",
+      timestamp: 100,
+      toolCalls: [
+        makeToolCall({
+          id: "tc-1",
+          toolName: "bash",
+          status: "completed",
+          result: "ok",
+        }),
+      ],
+    });
+    const later = makeMessage({
+      stableId: "a-new",
+      role: "assistant",
+      content: "follow-up",
+      timestamp: 200,
+    });
+    const result = sanitizeDisplayMessages([older, later]);
+    // No patching happened → identity preserved (COW guarantee).
+    expect(result).toBe(result); // sanity
+    expect(result[0]).toBe(older);
+    expect(result[1]).toBe(later);
+  });
+
+  test("leaves `status: 'error'` tool calls alone (already terminal)", () => {
+    const older = makeMessage({
+      stableId: "a-old",
+      role: "assistant",
+      timestamp: 100,
+      toolCalls: [
+        makeToolCall({
+          id: "tc-1",
+          toolName: "bash",
+          status: "error",
+          isError: true,
+          result: "boom",
+        }),
+      ],
+    });
+    const later = makeMessage({
+      stableId: "a-new",
+      role: "assistant",
+      content: "ok",
+      timestamp: 200,
+    });
+    const result = sanitizeDisplayMessages([older, later]);
+    expect(result[0]).toBe(older);
+    expect(result[0]!.toolCalls![0]!.result).toBe("boom");
+  });
+
+  test("patches only the running tool, leaves siblings on the same message alone", () => {
+    const older = makeMessage({
+      stableId: "a-old",
+      role: "assistant",
+      timestamp: 100,
+      toolCalls: [
+        makeToolCall({
+          id: "tc-1",
+          toolName: "bash",
+          status: "completed",
+          result: "first ok",
+        }),
+        makeToolCall({ id: "tc-2", toolName: "web_search", status: "running" }),
+        makeToolCall({
+          id: "tc-3",
+          toolName: "read_file",
+          status: "completed",
+          result: "third ok",
+        }),
+      ],
+    });
+    const later = makeMessage({
+      stableId: "a-new",
+      role: "assistant",
+      content: "done",
+      timestamp: 200,
+    });
+    const result = sanitizeDisplayMessages([older, later]);
+    expect(result[0]!.toolCalls![0]!.result).toBe("first ok");
+    expect(result[0]!.toolCalls![1]!.status).toBe("error");
+    expect(result[0]!.toolCalls![1]!.isError).toBe(true);
+    expect(result[0]!.toolCalls![1]!.result).toBe(SYNTHETIC);
+    expect(result[0]!.toolCalls![2]!.result).toBe("third ok");
+  });
+
+  test("patches multiple older assistants in a row", () => {
+    const a1 = makeMessage({
+      stableId: "a1",
+      role: "assistant",
+      timestamp: 100,
+      toolCalls: [
+        makeToolCall({ id: "tc-1", toolName: "bash", status: "running" }),
+      ],
+    });
+    const a2 = makeMessage({
+      stableId: "a2",
+      role: "assistant",
+      timestamp: 200,
+      toolCalls: [
+        makeToolCall({ id: "tc-2", toolName: "bash", status: "running" }),
+      ],
+    });
+    const a3 = makeMessage({
+      stableId: "a3",
+      role: "assistant",
+      content: "done",
+      timestamp: 300,
+    });
+    const result = sanitizeDisplayMessages([a1, a2, a3]);
+    expect(result[0]!.toolCalls![0]!.status).toBe("error");
+    expect(result[1]!.toolCalls![0]!.status).toBe("error");
+    expect(result[2]).toBe(a3);
+  });
+
+  test("does not mutate the input messages or tool-call objects", () => {
+    const tc = makeToolCall({ id: "tc", toolName: "bash", status: "running" });
+    const older = makeMessage({
+      stableId: "a-old",
+      role: "assistant",
+      timestamp: 100,
+      toolCalls: [tc],
+    });
+    const later = makeMessage({
+      stableId: "a-new",
+      role: "assistant",
+      content: "ok",
+      timestamp: 200,
+    });
+    sanitizeDisplayMessages([older, later]);
+    expect(tc.status).toBe("running");
+    expect(tc.result).toBeUndefined();
+    expect(older.toolCalls![0]).toBe(tc);
+  });
+
+  test("preserves message identity when no tool calls are dangling", () => {
+    // The sort step always returns a new outer array, so the array-identity
+    // assertion lives at the *element* level: every message object must be
+    // the same reference. Confirms the repair step is COW at the message
+    // boundary when nothing needs patching.
+    const a1 = makeMessage({
+      stableId: "a1",
+      role: "assistant",
+      timestamp: 100,
+      toolCalls: [
+        makeToolCall({
+          id: "tc-1",
+          toolName: "bash",
+          status: "completed",
+          result: "ok",
+        }),
+      ],
+    });
+    const a2 = makeMessage({
+      stableId: "a2",
+      role: "assistant",
+      content: "done",
+      timestamp: 200,
+    });
+    const result = sanitizeDisplayMessages([a1, a2]);
+    expect(result[0]).toBe(a1);
+    expect(result[1]).toBe(a2);
+  });
+
+  test("empty array returns empty (no crashes from index math)", () => {
+    expect(sanitizeDisplayMessages([])).toEqual([]);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Integration — all four hacks compose
 // ---------------------------------------------------------------------------
 
 describe("sanitizeDisplayMessages · integration", () => {
-  test("sort → invalid filter → trailing-dup drop runs in order", () => {
-    // Construct a messy input that exercises all three hacks at once.
+  test("sort → invalid filter → trailing-dup drop → dangling-tool repair runs in order", () => {
+    // Construct a messy input that exercises all four hacks at once.
     const phantom = makeMessage({
       stableId: "phantom",
       role: "user",
@@ -396,6 +669,17 @@ describe("sanitizeDisplayMessages · integration", () => {
       content: "What's the answer?",
       timestamp: 100,
     });
+    // An older assistant message with a running tool call — its `tool_result`
+    // event was lost in transit. We expect hack #4 to patch this.
+    const olderWithDangling = makeMessage({
+      stableId: "older",
+      role: "assistant",
+      textSegments: [{ type: "text", content: "let me check" }],
+      toolCalls: [
+        makeToolCall({ id: "tc-x", toolName: "bash", status: "running" }),
+      ],
+      timestamp: 150,
+    });
     // The "real" assistant turn (server-assigned id).
     const server = makeMessage({
       stableId: "server-abc",
@@ -416,12 +700,28 @@ describe("sanitizeDisplayMessages · integration", () => {
     // first; `server` precedes `orphan` in the input because the sort is
     // stable on equal timestamps and the production duplicate-emission
     // order is "server row first, orphan row second".
-    const result = sanitizeDisplayMessages([phantom, server, orphan, userTurn]);
+    const result = sanitizeDisplayMessages([
+      phantom,
+      server,
+      orphan,
+      olderWithDangling,
+      userTurn,
+    ]);
 
     // Expect:
     //   - phantom dropped by hack #2,
-    //   - rows sorted by timestamp (user → server → orphan after sort),
-    //   - trailing orphan dropped by hack #3.
-    expect(result.map((m) => m.stableId)).toEqual(["user", "server-abc"]);
+    //   - rows sorted by timestamp (user → olderWithDangling → server → orphan),
+    //   - trailing orphan dropped by hack #3 (matches `server` on text + tool calls),
+    //   - olderWithDangling's running tool call patched by hack #4 because
+    //     `server` is a later assistant.
+    expect(result.map((m) => m.stableId)).toEqual([
+      "user",
+      "older",
+      "server-abc",
+    ]);
+    const patchedTool = result[1]!.toolCalls![0]!;
+    expect(patchedTool.status).toBe("error");
+    expect(patchedTool.isError).toBe(true);
+    expect(patchedTool.result).toContain("client-side data loss");
   });
 });