From eae85fadb659e51611d4bff78ed33f7599ce2db1 Mon Sep 17 00:00:00 2001 From: "vellum-apollo-bot[bot]" <242025090+vellum-apollo-bot[bot]@users.noreply.github.com> Date: Mon, 25 May 2026 14:37:06 +0000 Subject: [PATCH] fix(web): consolidate chat display-message hacks into sanitize pipeline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Centralizes three short-term UI-boundary workarounds into a single `sanitizeDisplayMessages` helper: 1. sortByTimestamp — orders messages by createdAt. 2. removeInvalidMessages — drops phantom/blank rows. 3. removeDuplicateTrailingAssistant — drops a duplicate trailing assistant row when stableIds differ (`server-...` then `assistant-...`) but textSegments + toolCalls match. New hack patching a recent regression where the final assistant message renders twice. Each sub-method carries a doc-comment explaining the upstream bug it patches and the condition under which it can be removed. Pure — never mutates input. Callers updated: chat-route-content and debug-api#tail both run the full pipeline so tail() keeps mirroring the UI. Migrated the existing isInvalidMessage tests from build-items.test.ts to the new sanitize-display-messages.test.ts. --- .../chat/components/chat-route-content.tsx | 26 +- .../chat/transcript/build-items.test.ts | 92 +--- .../domains/chat/transcript/build-items.ts | 30 +- apps/web/src/domains/chat/utils/debug-api.ts | 9 +- .../utils/sanitize-display-messages.test.ts | 427 ++++++++++++++++++ .../chat/utils/sanitize-display-messages.ts | 184 ++++++++ 6 files changed, 644 insertions(+), 124 deletions(-) create mode 100644 apps/web/src/domains/chat/utils/sanitize-display-messages.test.ts create mode 100644 apps/web/src/domains/chat/utils/sanitize-display-messages.ts diff --git a/apps/web/src/domains/chat/components/chat-route-content.tsx b/apps/web/src/domains/chat/components/chat-route-content.tsx index 215acaa76cf..c161a72aa41 100644 --- a/apps/web/src/domains/chat/components/chat-route-content.tsx +++ b/apps/web/src/domains/chat/components/chat-route-content.tsx @@ -72,8 +72,9 @@ import { useDeployStore } from "@/domains/chat/deploy-store.js"; import { useInteractionStore } from "@/domains/interactions/interaction-store.js"; import type { SubagentEntry, SubagentState } from "@/domains/subagents/subagent-store.js"; import type { DisplayAttachment, DisplayMessage } from "@/domains/chat/utils/reconcile.js"; -import { sortedByTimestamp } from "@/domains/chat/utils/message-sorting.js"; + import { buildTranscriptItems } from "@/domains/chat/transcript/build-items.js"; +import { sanitizeDisplayMessages } from "@/domains/chat/utils/sanitize-display-messages.js"; import type { TranscriptPaginationState } from "@/domains/chat/transcript/types.js"; import type { HistoryPaginationResult } from "@/domains/chat/transcript/use-history-pagination.js"; import { @@ -774,20 +775,15 @@ export function ChatRouteContent({ const thinkingLabel = getThinkingStatusText(turnState); - // Defensive ascending-timestamp sort at the render boundary. The internal - // mutators (`setMessages` in stream handlers, `reconcile`, etc.) try to - // keep the array sorted, but a handful of paths can land rows out of - // order — multi-row server clusters with the same `daemonMessageId`, a - // late tool-result event for an earlier bubble, history pages stitched - // around an in-flight stream, etc. Sorting here guarantees the user - // always sees messages in chronological order, regardless of which write - // path landed last. The deeper fix (server-side cluster merging so the - // client never sees the fragmented rows) is tracked separately. - // - // `sortedByTimestamp` is stable: rows without a `timestamp` keep their - // original slot, and equal timestamps preserve insertion order, so - // streaming bubbles don't flicker. - const sortedMessages = useMemo(() => sortedByTimestamp(messages), [messages]); + // Single render-boundary cleanup pass. `sanitizeDisplayMessages` houses + // every "this shouldn't be necessary, but is" hack we apply before the + // transcript renders (timestamp sort, blank/phantom row filter, duplicate + // trailing assistant drop). See `sanitize-display-messages.ts` for the + // rationale and removal triggers for each sub-step. + const sortedMessages = useMemo( + () => sanitizeDisplayMessages(messages), + [messages], + ); const transcriptItems = useMemo( () => diff --git a/apps/web/src/domains/chat/transcript/build-items.test.ts b/apps/web/src/domains/chat/transcript/build-items.test.ts index 0f8efb65b09..26bf885551b 100644 --- a/apps/web/src/domains/chat/transcript/build-items.test.ts +++ b/apps/web/src/domains/chat/transcript/build-items.test.ts @@ -267,34 +267,14 @@ describe("buildTranscriptItems", () => { }); // --------------------------------------------------------------------------- - // Phantom tool-only message filter (ATL-659) + // Phantom tool-only message filter (ATL-659) — pass-through behaviour only // - // The daemon synthesises tool calls with `toolName === "unknown"` when a - // tool_result block has no matching tool_use (orphan). They arrive as - // empty user messages whose only payload is a list of unknown tool calls - // and would otherwise render as a confusing "Completed 1 step / Used - // unknown" chip. Drop them at the projection step. + // The drop-the-phantom logic now lives in `sanitizeDisplayMessages` and is + // exercised by `sanitize-display-messages.test.ts`. The tests below + // confirm that `buildTranscriptItems` does NOT mistakenly drop legitimate + // mixed-tool messages — the positive half of the original spec. // --------------------------------------------------------------------------- - test("phantom tool-only messages (all toolName === 'unknown') are dropped", () => { - const phantom = makeMessage({ - id: "m1", - role: "user", - content: "", - stableId: "s-phantom", - toolCalls: [ - { id: "tc-1", toolName: "unknown", input: {}, status: "completed", result: "orphan" }, - ], - }); - - const items = buildTranscriptItems({ - ...emptyInput(), - messages: [phantom], - }); - - expect(items).toHaveLength(0); - }); - test("mixed messages with unknown tool calls alongside content are kept", () => { const mixed = makeMessage({ id: "m1", @@ -407,65 +387,15 @@ describe("buildTranscriptItems", () => { }); // --------------------------------------------------------------------------- - // Blank user-row filter — pagination-boundary orphans + // Blank user-row filter — pass-through behaviour only // - // At a history-pagination boundary, the runtime keeps tool_result-only - // user rows even when their parent tool_use lives on a previous page - // (to avoid permanent data loss). `renderHistoryContent` then drops the - // orphan tool_result block, leaving the row on the wire with no content, - // no segments, no surfaces, no attachments, and no tool calls — a blank - // user bubble. The projection layer drops these so they don't render. + // The drop-blank-rows logic now lives in `sanitizeDisplayMessages` and is + // exercised by `sanitize-display-messages.test.ts`. The tests below + // confirm that `buildTranscriptItems` does NOT mistakenly drop legitimate + // user rows whose `content` is empty but which carry segments, surfaces, + // attachments, or are in a queued state — the positive half of the spec. // --------------------------------------------------------------------------- - test("truly blank user rows (no content, no segments, no surfaces, no attachments, no tool calls) are dropped", () => { - const blank = makeMessage({ - id: "m1", - role: "user", - content: "", - stableId: "s-blank-server", - }); - - const items = buildTranscriptItems({ - ...emptyInput(), - messages: [blank], - }); - - expect(items).toHaveLength(0); - }); - - test("blank user rows with whitespace-only content are dropped", () => { - const whitespace = makeMessage({ - id: "m1", - role: "user", - content: " \n\t ", - stableId: "s-blank-ws", - }); - - const items = buildTranscriptItems({ - ...emptyInput(), - messages: [whitespace], - }); - - expect(items).toHaveLength(0); - }); - - test("blank user rows with empty textSegments are dropped", () => { - const emptySegments = makeMessage({ - id: "m1", - role: "user", - content: "", - stableId: "s-empty-segments", - textSegments: [{ type: "text", content: "" }], - }); - - const items = buildTranscriptItems({ - ...emptyInput(), - messages: [emptySegments], - }); - - expect(items).toHaveLength(0); - }); - test("user rows with non-empty textSegments are kept (even if content is empty)", () => { // Some history paths populate textSegments instead of (or in addition to) // the flat content field — those rows are meaningful and must render. diff --git a/apps/web/src/domains/chat/transcript/build-items.ts b/apps/web/src/domains/chat/transcript/build-items.ts index 7785450887e..cd82a28ddb4 100644 --- a/apps/web/src/domains/chat/transcript/build-items.ts +++ b/apps/web/src/domains/chat/transcript/build-items.ts @@ -53,30 +53,10 @@ export interface BuildTranscriptItemsInput { * d. `ErrorItem` when `errorNotice` is a non-empty string. * * Every returned item carries a non-empty, distinct `key`. + * + * `messages` must already be sanitized — phantom/blank row filtering and + * trailing-duplicate drops happen upstream in `sanitizeDisplayMessages`. */ -function isInvalidMessage(message: DisplayMessage): boolean { - // Assistant rows always render; queued user rows collapse into a marker upstream. - if (message.role !== "user") return false; - if (message.queueStatus === "queued") return false; - - // Any meaningful signal short-circuits as valid. Without one of these the - // row is a blank bubble (e.g. an orphan tool_result at a pagination boundary - // that the daemon's renderer already stripped). - if (message.content && message.content.trim().length > 0) return false; - if ( - message.textSegments?.some( - (s) => typeof s.content === "string" && s.content.trim().length > 0, - ) - ) - return false; - if (message.surfaces && message.surfaces.length > 0) return false; - if (message.attachments && message.attachments.length > 0) return false; - if (message.slackMessage) return false; - if (message.toolCalls?.some((tc) => tc.toolName !== "unknown")) return false; - - return true; -} - export function buildTranscriptItems( input: BuildTranscriptItemsInput, ): TranscriptItem[] { @@ -105,10 +85,6 @@ export function buildTranscriptItems( continue; } - if (isInvalidMessage(message)) { - continue; - } - const isQueuedUser = message.role === "user" && message.queueStatus === "queued"; diff --git a/apps/web/src/domains/chat/utils/debug-api.ts b/apps/web/src/domains/chat/utils/debug-api.ts index 85b65b53bc4..16fb585d89e 100644 --- a/apps/web/src/domains/chat/utils/debug-api.ts +++ b/apps/web/src/domains/chat/utils/debug-api.ts @@ -39,6 +39,7 @@ import type { } from "@/domains/chat/types/chat-ui-types.js"; import { recordChatDiagnostic } from "@/domains/chat/utils/diagnostics.js"; import type { DisplayMessage } from "@/domains/chat/utils/reconcile.js"; +import { sanitizeDisplayMessages } from "@/domains/chat/utils/sanitize-display-messages.js"; import type { ReconcileActiveConversationResult } from "@/domains/chat/hooks/use-message-reconciliation.js"; import { classifyScrollPosition, @@ -379,7 +380,13 @@ export function createChatDebugApi(refs: ChatDebugRefs): ChatDebugApi { Number.isFinite(limit) && limit > 0 ? Math.floor(limit) : DEFAULT_TAIL_LIMIT; - const messages = refs.messagesRef.current ?? []; + // Sanitize so `tail()` returns the same array the UI ends up rendering + // (the chat-route render path also pipes `messages` through + // `sanitizeDisplayMessages`). Without this we'd surface trailing + // duplicates / blank rows that the UI is intentionally filtering out, + // which would be misleading when triaging "why does my chat look like X" + // reports. + const messages = sanitizeDisplayMessages(refs.messagesRef.current ?? []); const startIndex = Math.max(0, messages.length - safeLimit); return messages.slice(startIndex); } diff --git a/apps/web/src/domains/chat/utils/sanitize-display-messages.test.ts b/apps/web/src/domains/chat/utils/sanitize-display-messages.test.ts new file mode 100644 index 00000000000..3bdd80a1d8f --- /dev/null +++ b/apps/web/src/domains/chat/utils/sanitize-display-messages.test.ts @@ -0,0 +1,427 @@ +import { describe, expect, test } from "bun:test"; + +import { sanitizeDisplayMessages } from "@/domains/chat/utils/sanitize-display-messages.js"; +import type { DisplayMessage } from "@/domains/chat/types/types.js"; +import type { ChatMessageToolCall } from "@/domains/chat/api/event-types.js"; +import { newStableId } from "@/domains/chat/utils/stable-id.js"; + +function makeMessage( + overrides: Partial & { stableId?: string }, +): DisplayMessage { + return { + stableId: overrides.stableId ?? newStableId("test"), + role: "assistant", + content: "", + ...overrides, + }; +} + +function makeToolCall( + overrides: Partial & { id: string; toolName: string }, +): ChatMessageToolCall { + return { + input: {}, + status: "completed", + ...overrides, + }; +} + +describe("sanitizeDisplayMessages", () => { + test("returns the input unchanged when no sub-method fires", () => { + const messages: DisplayMessage[] = [ + makeMessage({ stableId: "u-1", role: "user", content: "hi", timestamp: 1 }), + makeMessage({ stableId: "a-1", role: "assistant", content: "hello", timestamp: 2 }), + ]; + const result = sanitizeDisplayMessages(messages); + expect(result.map((m) => m.stableId)).toEqual(["u-1", "a-1"]); + }); + + test("does not mutate the input array", () => { + const messages: DisplayMessage[] = [ + makeMessage({ stableId: "b", role: "assistant", content: "b", timestamp: 200 }), + makeMessage({ stableId: "a", role: "assistant", content: "a", timestamp: 100 }), + ]; + const snapshot = messages.map((m) => m.stableId); + sanitizeDisplayMessages(messages); + expect(messages.map((m) => m.stableId)).toEqual(snapshot); + }); +}); + +// --------------------------------------------------------------------------- +// Hack #1 — timestamp sort +// --------------------------------------------------------------------------- + +describe("sanitizeDisplayMessages · timestamp sort", () => { + // These tests intentionally interleave user / assistant rows so the trailing + // assistant-duplicate hack (#3) never fires and we observe the sort in + // isolation. + test("orders timestamped messages ascending", () => { + const messages: DisplayMessage[] = [ + makeMessage({ stableId: "c", role: "user", content: "c", timestamp: 300 }), + makeMessage({ stableId: "a", role: "user", content: "a", timestamp: 100 }), + makeMessage({ stableId: "b", role: "user", content: "b", timestamp: 200 }), + ]; + const result = sanitizeDisplayMessages(messages); + expect(result.map((m) => m.stableId)).toEqual(["a", "b", "c"]); + }); + + test("rows without a timestamp keep their original slot", () => { + const messages: DisplayMessage[] = [ + makeMessage({ stableId: "later-ts", role: "user", content: "x", timestamp: 200 }), + makeMessage({ stableId: "no-ts", role: "user", content: "y" }), + makeMessage({ stableId: "earlier-ts", role: "user", content: "z", timestamp: 100 }), + ]; + const result = sanitizeDisplayMessages(messages); + expect(result.map((m) => m.stableId)).toEqual([ + "earlier-ts", + "no-ts", + "later-ts", + ]); + }); +}); + +// --------------------------------------------------------------------------- +// Hack #2 — invalid (blank / phantom) row filter +// --------------------------------------------------------------------------- + +describe("sanitizeDisplayMessages · invalid row filter", () => { + test("drops blank user rows with no content / segments / surfaces / attachments / tool calls", () => { + const blank = makeMessage({ stableId: "blank", role: "user", content: "" }); + const real = makeMessage({ stableId: "real", role: "user", content: "hi", timestamp: 1 }); + const result = sanitizeDisplayMessages([blank, real]); + expect(result.map((m) => m.stableId)).toEqual(["real"]); + }); + + test("drops user rows with whitespace-only content", () => { + const whitespace = makeMessage({ + stableId: "whitespace", + role: "user", + content: " \n\t ", + }); + const result = sanitizeDisplayMessages([whitespace]); + expect(result).toEqual([]); + }); + + test("drops user rows whose textSegments are all empty strings", () => { + const emptySegments = makeMessage({ + stableId: "empty-segments", + role: "user", + content: "", + textSegments: [{ type: "text", content: "" }], + }); + const result = sanitizeDisplayMessages([emptySegments]); + expect(result).toEqual([]); + }); + + test("drops phantom tool-only user messages where every toolName === 'unknown'", () => { + const phantom = makeMessage({ + stableId: "phantom", + role: "user", + content: "", + toolCalls: [ + makeToolCall({ id: "tc-1", toolName: "unknown", result: "orphan" }), + ], + }); + const result = sanitizeDisplayMessages([phantom]); + expect(result).toEqual([]); + }); + + test("keeps user messages with mixed known + unknown tool calls", () => { + const mixed = makeMessage({ + stableId: "mixed", + role: "user", + content: "", + toolCalls: [ + makeToolCall({ id: "tc-1", toolName: "unknown", result: "orphan" }), + makeToolCall({ id: "tc-2", toolName: "bash", result: "file.txt" }), + ], + }); + const result = sanitizeDisplayMessages([mixed]); + expect(result.map((m) => m.stableId)).toEqual(["mixed"]); + }); + + test("never drops assistant rows even when they look 'empty'", () => { + const emptyAssistant = makeMessage({ + stableId: "empty-asst", + role: "assistant", + content: "", + }); + const result = sanitizeDisplayMessages([emptyAssistant]); + expect(result.map((m) => m.stableId)).toEqual(["empty-asst"]); + }); + + test("never drops queued user rows", () => { + const queued = makeMessage({ + stableId: "queued", + role: "user", + content: "", + queueStatus: "queued", + }); + const result = sanitizeDisplayMessages([queued]); + expect(result.map((m) => m.stableId)).toEqual(["queued"]); + }); +}); + +// --------------------------------------------------------------------------- +// Hack #3 — drop a duplicate trailing assistant message +// --------------------------------------------------------------------------- + +describe("sanitizeDisplayMessages · drop trailing assistant duplicate", () => { + test("drops a trailing assistant row that mirrors the previous one (the bug we're patching)", () => { + // Mirrors production failure: a "server-…" stableId row with `id` set is + // followed by an "assistant-…" stableId row with `id` undefined. + const server = makeMessage({ + stableId: "server-abc", + id: "msg-1", + role: "assistant", + content: "Final answer", + textSegments: [{ type: "text", content: "Final answer" }], + toolCalls: [ + makeToolCall({ id: "tc-1", toolName: "bash", result: "ok" }), + ], + timestamp: 1000, + }); + const orphan = makeMessage({ + stableId: "assistant-abc", + role: "assistant", + content: "Final answer", + textSegments: [{ type: "text", content: "Final answer" }], + toolCalls: [ + makeToolCall({ id: "tc-1", toolName: "bash", result: "ok" }), + ], + timestamp: 1000, + }); + + const result = sanitizeDisplayMessages([server, orphan]); + expect(result.map((m) => m.stableId)).toEqual(["server-abc"]); + }); + + test("keeps both rows when only one is the assistant", () => { + const user = makeMessage({ stableId: "u", role: "user", content: "hi", timestamp: 1 }); + const assistant = makeMessage({ + stableId: "a", + role: "assistant", + content: "hi", + timestamp: 2, + }); + const result = sanitizeDisplayMessages([user, assistant]); + expect(result.map((m) => m.stableId)).toEqual(["u", "a"]); + }); + + test("keeps both rows when textSegments differ", () => { + const first = makeMessage({ + stableId: "first", + role: "assistant", + textSegments: [{ type: "text", content: "Answer A" }], + timestamp: 1, + }); + const second = makeMessage({ + stableId: "second", + role: "assistant", + textSegments: [{ type: "text", content: "Answer B" }], + timestamp: 2, + }); + const result = sanitizeDisplayMessages([first, second]); + expect(result.map((m) => m.stableId)).toEqual(["first", "second"]); + }); + + test("keeps both rows when textSegments lengths differ", () => { + const first = makeMessage({ + stableId: "first", + role: "assistant", + textSegments: [{ type: "text", content: "Answer" }], + timestamp: 1, + }); + const second = makeMessage({ + stableId: "second", + role: "assistant", + textSegments: [ + { type: "text", content: "Answer" }, + { type: "text", content: "More" }, + ], + timestamp: 2, + }); + const result = sanitizeDisplayMessages([first, second]); + expect(result.map((m) => m.stableId)).toEqual(["first", "second"]); + }); + + test("keeps both rows when tool call toolName differs", () => { + const first = makeMessage({ + stableId: "first", + role: "assistant", + toolCalls: [makeToolCall({ id: "tc", toolName: "bash", result: "x" })], + timestamp: 1, + }); + const second = makeMessage({ + stableId: "second", + role: "assistant", + toolCalls: [makeToolCall({ id: "tc", toolName: "read", result: "x" })], + timestamp: 2, + }); + const result = sanitizeDisplayMessages([first, second]); + expect(result.map((m) => m.stableId)).toEqual(["first", "second"]); + }); + + test("keeps both rows when tool call result differs", () => { + const first = makeMessage({ + stableId: "first", + role: "assistant", + toolCalls: [makeToolCall({ id: "tc", toolName: "bash", result: "a" })], + timestamp: 1, + }); + const second = makeMessage({ + stableId: "second", + role: "assistant", + toolCalls: [makeToolCall({ id: "tc", toolName: "bash", result: "b" })], + timestamp: 2, + }); + const result = sanitizeDisplayMessages([first, second]); + expect(result.map((m) => m.stableId)).toEqual(["first", "second"]); + }); + + test("keeps both rows when tool call counts differ", () => { + const first = makeMessage({ + stableId: "first", + role: "assistant", + toolCalls: [makeToolCall({ id: "tc", toolName: "bash", result: "x" })], + timestamp: 1, + }); + const second = makeMessage({ + stableId: "second", + role: "assistant", + toolCalls: [ + makeToolCall({ id: "tc-a", toolName: "bash", result: "x" }), + makeToolCall({ id: "tc-b", toolName: "bash", result: "y" }), + ], + timestamp: 2, + }); + const result = sanitizeDisplayMessages([first, second]); + expect(result.map((m) => m.stableId)).toEqual(["first", "second"]); + }); + + test("does not look beyond the trailing pair", () => { + // Three identical assistant rows in a row — only the very last one is + // dropped, not the middle one. The hack is intentionally narrow. + const a = makeMessage({ + stableId: "a", + role: "assistant", + textSegments: [{ type: "text", content: "Same" }], + timestamp: 1, + }); + const b = makeMessage({ + stableId: "b", + role: "assistant", + textSegments: [{ type: "text", content: "Same" }], + timestamp: 2, + }); + const c = makeMessage({ + stableId: "c", + role: "assistant", + textSegments: [{ type: "text", content: "Same" }], + timestamp: 3, + }); + const result = sanitizeDisplayMessages([a, b, c]); + expect(result.map((m) => m.stableId)).toEqual(["a", "b"]); + }); + + test("handles two assistant rows with no tool calls and matching segments", () => { + const first = makeMessage({ + stableId: "first", + role: "assistant", + textSegments: [{ type: "text", content: "Hi" }], + timestamp: 1, + }); + const second = makeMessage({ + stableId: "second", + role: "assistant", + textSegments: [{ type: "text", content: "Hi" }], + timestamp: 2, + }); + const result = sanitizeDisplayMessages([first, second]); + expect(result.map((m) => m.stableId)).toEqual(["first"]); + }); + + test("handles two assistant rows with no segments and matching tool calls", () => { + const first = makeMessage({ + stableId: "first", + role: "assistant", + toolCalls: [makeToolCall({ id: "tc", toolName: "bash", result: "x" })], + timestamp: 1, + }); + const second = makeMessage({ + stableId: "second", + role: "assistant", + toolCalls: [makeToolCall({ id: "tc", toolName: "bash", result: "x" })], + timestamp: 2, + }); + const result = sanitizeDisplayMessages([first, second]); + expect(result.map((m) => m.stableId)).toEqual(["first"]); + }); + + test("single-message arrays are returned unchanged", () => { + const only = makeMessage({ + stableId: "only", + role: "assistant", + content: "lonely", + timestamp: 1, + }); + const result = sanitizeDisplayMessages([only]); + expect(result.map((m) => m.stableId)).toEqual(["only"]); + }); + + test("empty arrays are returned unchanged", () => { + expect(sanitizeDisplayMessages([])).toEqual([]); + }); +}); + +// --------------------------------------------------------------------------- +// Integration — all three hacks compose +// --------------------------------------------------------------------------- + +describe("sanitizeDisplayMessages · integration", () => { + test("sort → invalid filter → trailing-dup drop runs in order", () => { + // Construct a messy input that exercises all three hacks at once. + const phantom = makeMessage({ + stableId: "phantom", + role: "user", + content: "", + toolCalls: [ + makeToolCall({ id: "p", toolName: "unknown", result: "orphan" }), + ], + timestamp: 50, + }); + const userTurn = makeMessage({ + stableId: "user", + role: "user", + content: "What's the answer?", + timestamp: 100, + }); + // The "real" assistant turn (server-assigned id). + const server = makeMessage({ + stableId: "server-abc", + id: "msg-1", + role: "assistant", + textSegments: [{ type: "text", content: "42" }], + timestamp: 200, + }); + // The duplicate orphan emission (no id, "assistant-…" stableId). + const orphan = makeMessage({ + stableId: "assistant-abc", + role: "assistant", + textSegments: [{ type: "text", content: "42" }], + timestamp: 200, + }); + + // Insertion order is intentionally jumbled to make sure the sort runs + // first; `server` precedes `orphan` in the input because the sort is + // stable on equal timestamps and the production duplicate-emission + // order is "server row first, orphan row second". + const result = sanitizeDisplayMessages([phantom, server, orphan, userTurn]); + + // Expect: + // - phantom dropped by hack #2, + // - rows sorted by timestamp (user → server → orphan after sort), + // - trailing orphan dropped by hack #3. + expect(result.map((m) => m.stableId)).toEqual(["user", "server-abc"]); + }); +}); diff --git a/apps/web/src/domains/chat/utils/sanitize-display-messages.ts b/apps/web/src/domains/chat/utils/sanitize-display-messages.ts new file mode 100644 index 00000000000..8733ba0dac7 --- /dev/null +++ b/apps/web/src/domains/chat/utils/sanitize-display-messages.ts @@ -0,0 +1,184 @@ +// ----------------------------------------------------------------------------- +// sanitizeDisplayMessages — single home for "this shouldn't be necessary, +// but is" frontend cleanup applied to DisplayMessage[] before the transcript +// renders (and before `window._vellumDebug.chat.tail()` returns). +// +// Every sub-method below patches over an upstream issue. They are SHORT TERM +// and should be removed as the assistant backend stabilises the corresponding +// emission behaviour. Keeping them all in one file means we only have to look +// in one place when a render-layer "why am I seeing X" report lands, and we +// only have to delete one file when the backend is fixed. +// ----------------------------------------------------------------------------- + +import { sortedByTimestamp } from "@/domains/chat/utils/message-sorting.js"; +import type { DisplayMessage } from "@/domains/chat/types/types.js"; + +/** + * Apply every render-boundary hack to `messages` in a fixed order and return + * a new array (never mutates the input). + * + * Pipeline: + * 1. `sortByTimestamp` (Hack #1) + * 2. `removeInvalidMessages` (Hack #2) + * 3. `removeDuplicateTrailingAssistant` (Hack #3) + * + * Each step is independent — removing any one when the corresponding upstream + * bug is fixed is a one-line edit. + */ +export function sanitizeDisplayMessages( + messages: DisplayMessage[], +): DisplayMessage[] { + let result = sortByTimestamp(messages); + result = removeInvalidMessages(result); + result = removeDuplicateTrailingAssistant(result); + return result; +} + +// ----------------------------------------------------------------------------- +// Hack #1 — defensive ascending-timestamp sort at the render boundary +// ----------------------------------------------------------------------------- +// Why it exists: the internal mutators (stream handlers, reconcile, message +// merge, etc.) try to keep `messages` sorted, but several paths can land +// rows out of order: +// - multi-row server clusters that share the same `daemonMessageId`, +// - late `tool_result` events for an earlier bubble, +// - history pages stitched around an in-flight stream. +// Sorting here guarantees the user always sees messages in chronological +// order regardless of which write path landed last. +// +// `sortedByTimestamp` is stable: rows without a `timestamp` keep their +// original slot, and equal timestamps preserve insertion order — so +// streaming bubbles don't flicker. +// +// SHORT TERM until: the assistant backend merges multi-row clusters +// server-side so the client never sees the fragmented rows. +// ----------------------------------------------------------------------------- +function sortByTimestamp(messages: DisplayMessage[]): DisplayMessage[] { + return sortedByTimestamp(messages); +} + +// ----------------------------------------------------------------------------- +// Hack #2 — drop blank / phantom user rows +// ----------------------------------------------------------------------------- +// Why it exists: two upstream emission patterns leave us with user rows that +// have nothing to render: +// - At a history-pagination boundary the runtime keeps `tool_result`-only +// user rows even when their parent `tool_use` lives on a previous page +// (to avoid permanent data loss). The daemon's renderer then drops the +// orphan `tool_result` block, leaving a blank user bubble on the wire. +// - The daemon synthesises tool calls with `toolName === "unknown"` when a +// `tool_result` has no matching `tool_use`. Those arrive as empty user +// messages whose only payload is a list of "unknown" tools and would +// otherwise render as a confusing "Used unknown" chip (ATL-659). +// +// SHORT TERM until: the daemon stops emitting orphan `tool_result` rows and +// phantom unknown-tool placeholders at history boundaries. +// ----------------------------------------------------------------------------- +function removeInvalidMessages(messages: DisplayMessage[]): DisplayMessage[] { + let result: DisplayMessage[] | null = null; + for (let i = 0; i < messages.length; i++) { + const m = messages[i]!; + if (isInvalidMessage(m)) { + if (!result) result = messages.slice(0, i); + continue; + } + if (result) result.push(m); + } + return result ?? messages; +} + +function isInvalidMessage(message: DisplayMessage): boolean { + // Assistant rows always render; queued user rows collapse into a marker upstream. + if (message.role !== "user") return false; + if (message.queueStatus === "queued") return false; + + // Any meaningful signal short-circuits as valid. Without one of these the + // row is a blank bubble (e.g. an orphan tool_result at a pagination boundary + // that the daemon's renderer already stripped). + if (message.content && message.content.trim().length > 0) return false; + if ( + message.textSegments?.some( + (s) => typeof s.content === "string" && s.content.trim().length > 0, + ) + ) + return false; + if (message.surfaces && message.surfaces.length > 0) return false; + if (message.attachments && message.attachments.length > 0) return false; + if (message.slackMessage) return false; + if (message.toolCalls?.some((tc) => tc.toolName !== "unknown")) return false; + + return true; +} + +// ----------------------------------------------------------------------------- +// Hack #3 — drop a duplicate trailing assistant message +// ----------------------------------------------------------------------------- +// Why it exists: occasionally the daemon emits two rows for what is logically +// the same assistant turn — one with a server-assigned `id` and a `stableId` +// of the form "server-…", followed immediately by a sibling row with `id` +// undefined and a `stableId` of the form "assistant-…". The existing dedupe +// keys (`id` and `stableId`) both miss this case because both fields differ +// between the rows. Without this filter the UI renders the final assistant +// message twice (and `window._vellumDebug.chat.tail()` returns it twice). +// +// Predicate (must ALL hold to filter): +// - the last two messages are both `role: "assistant"`, +// - the trailing row has SOMETHING substantive to render — at least one of +// `textSegments`/`toolCalls` is non-empty. Guards against accidentally +// dropping two empty placeholders that happen to be sequentially equal, +// - their `textSegments` arrays match position-for-position on `type` and +// `content`, +// - their `toolCalls` arrays match position-for-position on `toolName` and +// `result`. +// When all four hold, drop the trailing row. +// +// SHORT TERM until: the assistant backend root-causes the duplicate emission +// (a parallel investigation owns the deeper fix). +// ----------------------------------------------------------------------------- +function removeDuplicateTrailingAssistant( + messages: DisplayMessage[], +): DisplayMessage[] { + if (messages.length < 2) return messages; + + const last = messages[messages.length - 1]!; + const prev = messages[messages.length - 2]!; + + if (last.role !== "assistant" || prev.role !== "assistant") return messages; + if (!hasSubstantiveContent(last)) return messages; + if (!textSegmentsMatch(prev, last)) return messages; + if (!toolCallsMatch(prev, last)) return messages; + + return messages.slice(0, -1); +} + +function hasSubstantiveContent(message: DisplayMessage): boolean { + if (message.textSegments && message.textSegments.length > 0) return true; + if (message.toolCalls && message.toolCalls.length > 0) return true; + return false; +} + +function textSegmentsMatch(a: DisplayMessage, b: DisplayMessage): boolean { + const aSegs = a.textSegments ?? []; + const bSegs = b.textSegments ?? []; + if (aSegs.length !== bSegs.length) return false; + for (let i = 0; i < aSegs.length; i++) { + const aSeg = aSegs[i]!; + const bSeg = bSegs[i]!; + if (aSeg.type !== bSeg.type) return false; + if (aSeg.content !== bSeg.content) return false; + } + return true; +} + +function toolCallsMatch(a: DisplayMessage, b: DisplayMessage): boolean { + const aTcs = a.toolCalls ?? []; + const bTcs = b.toolCalls ?? []; + if (aTcs.length !== bTcs.length) return false; + for (let i = 0; i < aTcs.length; i++) { + const aTc = aTcs[i]!; + const bTc = bTcs[i]!; + if (aTc.toolName !== bTc.toolName) return false; + if (aTc.result !== bTc.result) return false; + } + return true; +}