Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
314 changes: 307 additions & 7 deletions apps/web/src/domains/chat/utils/sanitize-display-messages.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -375,12 +375,285 @@ describe("sanitizeDisplayMessages · drop trailing assistant duplicate", () => {
});

// ---------------------------------------------------------------------------
// Integration — all three hacks compose
// Hack #4 — repair dangling tool calls on older assistant messages
// ---------------------------------------------------------------------------

describe("sanitizeDisplayMessages · repair dangling tool calls", () => {
const SYNTHETIC =
"Tool call completed on the server, but the result never reached the client. Subsequent assistant activity confirms the tool returned — this is a client-side data loss, not a tool failure.";

test("patches a running tool call on an older assistant when a later assistant exists", () => {
const older = makeMessage({
stableId: "a-old",
role: "assistant",
timestamp: 100,
toolCalls: [
makeToolCall({ id: "tc-1", toolName: "bash", status: "running" }),
],
});
const later = makeMessage({
stableId: "a-new",
role: "assistant",
content: "follow-up",
timestamp: 200,
});
const [patchedOld, untouchedNew] = sanitizeDisplayMessages([older, later]);
expect(patchedOld!.toolCalls![0]).toEqual({
id: "tc-1",
toolName: "bash",
input: {},
status: "error",
isError: true,
result: SYNTHETIC,
});
// The later assistant is untouched even if it has its own tool calls.
expect(untouchedNew).toBe(later);
});

test("does NOT patch the last assistant — it could still be streaming", () => {
const userMsg = makeMessage({
stableId: "u",
role: "user",
content: "go",
timestamp: 100,
});
const last = makeMessage({
stableId: "a-last",
role: "assistant",
timestamp: 200,
toolCalls: [
makeToolCall({ id: "tc-1", toolName: "bash", status: "running" }),
],
});
const result = sanitizeDisplayMessages([userMsg, last]);
expect(result[1]).toBe(last);
expect(result[1]!.toolCalls![0]!.status).toBe("running");
});

test("does NOT patch when only a subsequent USER message exists (no assistant proof)", () => {
const onlyAssistant = makeMessage({
stableId: "a-only",
role: "assistant",
timestamp: 100,
toolCalls: [
makeToolCall({ id: "tc-1", toolName: "bash", status: "running" }),
],
});
const trailingUser = makeMessage({
stableId: "u",
role: "user",
content: "ping",
timestamp: 200,
});
const result = sanitizeDisplayMessages([onlyAssistant, trailingUser]);
expect(result[0]).toBe(onlyAssistant);
expect(result[0]!.toolCalls![0]!.status).toBe("running");
});

test("patches across an intervening user message", () => {
const a1 = makeMessage({
stableId: "a1",
role: "assistant",
timestamp: 100,
toolCalls: [
makeToolCall({ id: "tc-1", toolName: "bash", status: "running" }),
],
});
const u = makeMessage({
stableId: "u",
role: "user",
content: "more",
timestamp: 200,
});
const a2 = makeMessage({
stableId: "a2",
role: "assistant",
content: "result",
timestamp: 300,
});
const result = sanitizeDisplayMessages([a1, u, a2]);
expect(result[0]!.toolCalls![0]!.status).toBe("error");
expect(result[0]!.toolCalls![0]!.result).toBe(SYNTHETIC);
});

test("leaves `status: 'completed'` tool calls alone (not dangling)", () => {
const older = makeMessage({
stableId: "a-old",
role: "assistant",
timestamp: 100,
toolCalls: [
makeToolCall({
id: "tc-1",
toolName: "bash",
status: "completed",
result: "ok",
}),
],
});
const later = makeMessage({
stableId: "a-new",
role: "assistant",
content: "follow-up",
timestamp: 200,
});
const result = sanitizeDisplayMessages([older, later]);
// No patching happened → identity preserved (COW guarantee).
expect(result).toBe(result); // sanity
expect(result[0]).toBe(older);
expect(result[1]).toBe(later);
});

test("leaves `status: 'error'` tool calls alone (already terminal)", () => {
const older = makeMessage({
stableId: "a-old",
role: "assistant",
timestamp: 100,
toolCalls: [
makeToolCall({
id: "tc-1",
toolName: "bash",
status: "error",
isError: true,
result: "boom",
}),
],
});
const later = makeMessage({
stableId: "a-new",
role: "assistant",
content: "ok",
timestamp: 200,
});
const result = sanitizeDisplayMessages([older, later]);
expect(result[0]).toBe(older);
expect(result[0]!.toolCalls![0]!.result).toBe("boom");
});

test("patches only the running tool, leaves siblings on the same message alone", () => {
const older = makeMessage({
stableId: "a-old",
role: "assistant",
timestamp: 100,
toolCalls: [
makeToolCall({
id: "tc-1",
toolName: "bash",
status: "completed",
result: "first ok",
}),
makeToolCall({ id: "tc-2", toolName: "web_search", status: "running" }),
makeToolCall({
id: "tc-3",
toolName: "read_file",
status: "completed",
result: "third ok",
}),
],
});
const later = makeMessage({
stableId: "a-new",
role: "assistant",
content: "done",
timestamp: 200,
});
const result = sanitizeDisplayMessages([older, later]);
expect(result[0]!.toolCalls![0]!.result).toBe("first ok");
expect(result[0]!.toolCalls![1]!.status).toBe("error");
expect(result[0]!.toolCalls![1]!.isError).toBe(true);
expect(result[0]!.toolCalls![1]!.result).toBe(SYNTHETIC);
expect(result[0]!.toolCalls![2]!.result).toBe("third ok");
});

test("patches multiple older assistants in a row", () => {
const a1 = makeMessage({
stableId: "a1",
role: "assistant",
timestamp: 100,
toolCalls: [
makeToolCall({ id: "tc-1", toolName: "bash", status: "running" }),
],
});
const a2 = makeMessage({
stableId: "a2",
role: "assistant",
timestamp: 200,
toolCalls: [
makeToolCall({ id: "tc-2", toolName: "bash", status: "running" }),
],
});
const a3 = makeMessage({
stableId: "a3",
role: "assistant",
content: "done",
timestamp: 300,
});
const result = sanitizeDisplayMessages([a1, a2, a3]);
expect(result[0]!.toolCalls![0]!.status).toBe("error");
expect(result[1]!.toolCalls![0]!.status).toBe("error");
expect(result[2]).toBe(a3);
});

test("does not mutate the input messages or tool-call objects", () => {
const tc = makeToolCall({ id: "tc", toolName: "bash", status: "running" });
const older = makeMessage({
stableId: "a-old",
role: "assistant",
timestamp: 100,
toolCalls: [tc],
});
const later = makeMessage({
stableId: "a-new",
role: "assistant",
content: "ok",
timestamp: 200,
});
sanitizeDisplayMessages([older, later]);
expect(tc.status).toBe("running");
expect(tc.result).toBeUndefined();
expect(older.toolCalls![0]).toBe(tc);
});

test("preserves message identity when no tool calls are dangling", () => {
// The sort step always returns a new outer array, so the array-identity
// assertion lives at the *element* level: every message object must be
// the same reference. Confirms the repair step is COW at the message
// boundary when nothing needs patching.
const a1 = makeMessage({
stableId: "a1",
role: "assistant",
timestamp: 100,
toolCalls: [
makeToolCall({
id: "tc-1",
toolName: "bash",
status: "completed",
result: "ok",
}),
],
});
const a2 = makeMessage({
stableId: "a2",
role: "assistant",
content: "done",
timestamp: 200,
});
const result = sanitizeDisplayMessages([a1, a2]);
expect(result[0]).toBe(a1);
expect(result[1]).toBe(a2);
});

test("empty array returns empty (no crashes from index math)", () => {
expect(sanitizeDisplayMessages([])).toEqual([]);
});
});

// ---------------------------------------------------------------------------
// Integration — all four hacks compose
// ---------------------------------------------------------------------------

describe("sanitizeDisplayMessages · integration", () => {
test("sort → invalid filter → trailing-dup drop runs in order", () => {
// Construct a messy input that exercises all three hacks at once.
test("sort → invalid filter → trailing-dup drop → dangling-tool repair runs in order", () => {
// Construct a messy input that exercises all four hacks at once.
const phantom = makeMessage({
stableId: "phantom",
role: "user",
Expand All @@ -396,6 +669,17 @@ describe("sanitizeDisplayMessages · integration", () => {
content: "What's the answer?",
timestamp: 100,
});
// An older assistant message with a running tool call — its `tool_result`
// event was lost in transit. We expect hack #4 to patch this.
const olderWithDangling = makeMessage({
stableId: "older",
role: "assistant",
textSegments: [{ type: "text", content: "let me check" }],
toolCalls: [
makeToolCall({ id: "tc-x", toolName: "bash", status: "running" }),
],
timestamp: 150,
});
// The "real" assistant turn (server-assigned id).
const server = makeMessage({
stableId: "server-abc",
Expand All @@ -416,12 +700,28 @@ describe("sanitizeDisplayMessages · integration", () => {
// first; `server` precedes `orphan` in the input because the sort is
// stable on equal timestamps and the production duplicate-emission
// order is "server row first, orphan row second".
const result = sanitizeDisplayMessages([phantom, server, orphan, userTurn]);
const result = sanitizeDisplayMessages([
phantom,
server,
orphan,
olderWithDangling,
userTurn,
]);

// Expect:
// - phantom dropped by hack #2,
// - rows sorted by timestamp (user → server → orphan after sort),
// - trailing orphan dropped by hack #3.
expect(result.map((m) => m.stableId)).toEqual(["user", "server-abc"]);
// - rows sorted by timestamp (user → olderWithDangling → server → orphan),
// - trailing orphan dropped by hack #3 (matches `server` on text + tool calls),
// - olderWithDangling's running tool call patched by hack #4 because
// `server` is a later assistant.
expect(result.map((m) => m.stableId)).toEqual([
"user",
"older",
"server-abc",
]);
const patchedTool = result[1]!.toolCalls![0]!;
expect(patchedTool.status).toBe("error");
expect(patchedTool.isError).toBe(true);
expect(patchedTool.result).toContain("client-side data loss");
});
});
Loading