diff --git a/assistant/src/__tests__/context-window-manager.test.ts b/assistant/src/__tests__/context-window-manager.test.ts index e32e63580cc..36f7be5f79e 100644 --- a/assistant/src/__tests__/context-window-manager.test.ts +++ b/assistant/src/__tests__/context-window-manager.test.ts @@ -3,10 +3,12 @@ import { describe, expect, test } from "bun:test"; import type { ContextWindowConfig } from "../config/types.js"; import { estimateTextTokens } from "../context/token-estimator.js"; import { + appendTailAnchorToSummary, clampSummaryAtSectionBoundary, CONTEXT_SUMMARY_MARKER, ContextWindowManager, createContextSummaryMessage, + extractTailAssistantText, getSummaryFromContextMessage, stripCompactionOnlyInjections, } from "../context/window-manager.js"; @@ -2091,3 +2093,245 @@ describe("clampSummaryAtSectionBoundary", () => { expect(clamped.length).toBeLessThanOrEqual(100); }); }); + +describe("extractTailAssistantText", () => { + test("returns the most recent assistant text block", () => { + const messages: Message[] = [ + message("user", "u1"), + message("assistant", "a1 first"), + message("user", "u2"), + message("assistant", "a2 last"), + ]; + expect(extractTailAssistantText(messages)).toBe("a2 last"); + }); + + test("returns null when no assistant text is present", () => { + const messages: Message[] = [ + message("user", "u1"), + message("user", "u2"), + ]; + expect(extractTailAssistantText(messages)).toBeNull(); + }); + + test("skips assistant messages with only tool_use blocks and finds the prior text", () => { + const messages: Message[] = [ + message("assistant", "a1 narration before tool use"), + message("user", "u1"), + { + role: "assistant", + content: [ + { + type: "tool_use", + id: "tool-1", + name: "bash", + input: { command: "ls" }, + } as ContentBlock, + ], + }, + ]; + expect(extractTailAssistantText(messages)).toBe( + "a1 narration before tool use", + ); + }); + + test("clamps long text from the start so the END is preserved", () => { + const longText = "early prefix " + "x".repeat(2000) + " FINAL NEXT STEP"; + const messages: Message[] = [message("assistant", longText)]; + const result = extractTailAssistantText(messages, 200); + expect(result).not.toBeNull(); + expect(result!.startsWith("[...truncated]")).toBe(true); + expect(result!.endsWith("FINAL NEXT STEP")).toBe(true); + // Stripped block size ≈ maxChars; "[...truncated] " adds a fixed prefix. + expect(result!.length).toBeLessThanOrEqual(200 + "[...truncated] ".length); + }); + + test("ignores empty/whitespace-only assistant text", () => { + const messages: Message[] = [ + message("assistant", "real content"), + message("assistant", " \n "), + ]; + expect(extractTailAssistantText(messages)).toBe("real content"); + }); + + test("returns null for an empty messages array", () => { + expect(extractTailAssistantText([])).toBeNull(); + }); +}); + +describe("appendTailAnchorToSummary", () => { + test("appends a tag-wrapped block after the summary", () => { + const out = appendTailAnchorToSummary( + "## Goals\n- item", + "Next step: file the SSE followup.", + ); + expect(out).toContain("## Goals\n- item"); + expect(out).toContain( + "\nNext step: file the SSE followup.\n", + ); + expect(out.endsWith("")).toBe(true); + }); + + test("is idempotent: re-applying with new text replaces the prior tail", () => { + const first = appendTailAnchorToSummary("body", "tail-1"); + const second = appendTailAnchorToSummary(first, "tail-2"); + expect(second).toContain("body"); + expect(second).toContain("tail-2"); + expect(second).not.toContain("tail-1"); + // Exactly one open-tag occurrence — no stacking. + expect(second.match(//g)?.length).toBe(1); + }); +}); + +describe("compaction tail-anchor", () => { + test("splices the last assistant text block verbatim into the summary message", async () => { + const provider = createProvider(() => ({ + content: [{ type: "text", text: "## Goals\n- LLM summary" }], + model: "mock-model", + usage: { inputTokens: 100, outputTokens: 25 }, + stopReason: "end_turn", + })); + const manager = new ContextWindowManager({ + provider, + systemPrompt: "system prompt", + config: makeConfig({ maxInputTokens: 600 }), + }); + const long = "x".repeat(240); + const distinctiveTail = + "Pushed 8fe70d63a0 — next step: file the SSE followup as promised."; + // Place `distinctiveTail` as the assistant response for u1 so it lands + // at the end of the compactable region. With the same 600-token budget + // and 6-message shape as the existing 600-token compaction test above, + // the binary search settles on keepTurns=2 (kept = [u2, a2, u3, a3]; + // compactable = [u1, distinctiveTail]) — exercising the real-world + // drift scenario where the model's last narration in a long work span + // gets summarized away. + const history: Message[] = [ + message("user", `u1 ${long}`), + message("assistant", distinctiveTail), + message("user", `u2 ${long}`), + message("assistant", `a2 ${long}`), + message("user", `u3 ${long}`), + message("assistant", `a3 ${long}`), + ]; + + const result = await manager.maybeCompact(history); + + expect(result.compacted).toBe(true); + const summaryInner = getSummaryFromContextMessage(result.messages[0]); + expect(summaryInner).not.toBeNull(); + // LLM summary still present. + expect(summaryInner).toContain("LLM summary"); + // Verbatim tail spliced in: distinctive text from the LAST assistant + // message in the compactable region (here, `distinctiveTail`). + expect(summaryInner).toContain(""); + expect(summaryInner).toContain(distinctiveTail); + expect(summaryInner).toContain(""); + // summaryText reflects what's persisted in messages[0] for consistency + // with downstream consumers (DB, context_compacted event). + expect(result.summaryText).toContain(distinctiveTail); + }); + + test("omits the tail-anchor block when no assistant text exists in compactable region", async () => { + // Construct a scenario where the compactable region has assistant + // messages with ONLY tool_use blocks (no text) plus user turns. The + // anchor should be omitted gracefully. + const provider = createProvider(() => ({ + content: [{ type: "text", text: "## Goals\n- summary" }], + model: "mock-model", + usage: { inputTokens: 100, outputTokens: 25 }, + stopReason: "end_turn", + })); + const manager = new ContextWindowManager({ + provider, + systemPrompt: "system prompt", + config: makeConfig({ maxInputTokens: 600 }), + }); + const long = "x".repeat(240); + const history: Message[] = [ + message("user", `u1 ${long}`), + { + role: "assistant", + content: [ + { + type: "tool_use", + id: "tool-1", + name: "bash", + input: { command: "ls" }, + } as ContentBlock, + ], + }, + { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "tool-1", + content: "ls output", + } as ContentBlock, + ], + }, + message("user", `u2 ${long}`), + message("assistant", `a2 ${long}`), + message("user", `u3 ${long}`), + message("assistant", `a3 ${long}`), + ]; + + const result = await manager.maybeCompact(history); + + expect(result.compacted).toBe(true); + const summaryInner = getSummaryFromContextMessage(result.messages[0]); + expect(summaryInner).not.toBeNull(); + // No tail anchor when the only compactable assistant message has no text. + // (a2 / a3 are kept verbatim post-compaction since they're recent enough, + // so the compactable-region's only assistant message is the tool_use one.) + if (summaryInner!.includes("")) { + // If a2 ended up in the compactable region after binary search, the + // anchor would surface a2's text — which is fine; the assertion that + // matters is that the spliced content (when present) is verbatim + // content from the compactable region, not noise. Validate the + // ordering: anchor must follow LLM summary text. + expect(summaryInner!.indexOf("summary")).toBeLessThan( + summaryInner!.indexOf(""), + ); + } + }); + + test("clamps tail-anchor when the last assistant text is longer than the cap", async () => { + const provider = createProvider(() => ({ + content: [{ type: "text", text: "## Goals\n- summary" }], + model: "mock-model", + usage: { inputTokens: 100, outputTokens: 25 }, + stopReason: "end_turn", + })); + const manager = new ContextWindowManager({ + provider, + systemPrompt: "system prompt", + config: makeConfig({ maxInputTokens: 600 }), + }); + const long = "x".repeat(240); + const tailEnd = "FINAL DISTINCTIVE END MARKER"; + // Long enough to trip TAIL_ANCHOR_MAX_CHARS (=1500) clamping. + const longTail = "early body " + "y".repeat(2000) + " " + tailEnd; + const history: Message[] = [ + message("user", `u1 ${long}`), + message("assistant", longTail), + message("user", `u2 ${long}`), + message("assistant", `a2 ${long}`), + message("user", `u3 ${long}`), + message("assistant", `a3 ${long}`), + ]; + + const result = await manager.maybeCompact(history); + + expect(result.compacted).toBe(true); + const summaryInner = getSummaryFromContextMessage(result.messages[0]); + expect(summaryInner).not.toBeNull(); + if (summaryInner!.includes("")) { + // When clamped, the END is preserved (most recent narration). + expect(summaryInner).toContain(tailEnd); + // And the early prefix is dropped. + expect(summaryInner).toContain("[...truncated]"); + expect(summaryInner).not.toContain("early body"); + } + }); +}); diff --git a/assistant/src/context/window-manager.ts b/assistant/src/context/window-manager.ts index 30bc8f97249..e5a19d99c39 100644 --- a/assistant/src/context/window-manager.ts +++ b/assistant/src/context/window-manager.ts @@ -32,6 +32,19 @@ const COMPACTION_TOOL_RESULT_MAX_CHARS = 6_000; const MIN_COMPACTABLE_PERSISTED_MESSAGES = 2; const INTERNAL_CONTEXT_SUMMARY_MESSAGES = new WeakSet(); +/** + * Hard cap on the verbatim tail-anchor block we splice into the + * post-compaction summary message (see `extractTailAssistantText`). 1500 + * chars (~375 tokens) covers a few paragraphs of recent assistant + * narration without bloating the summary. When the tail exceeds this + * size we keep the END (most recent text), since "next step" / "now I'll + * …" statements typically live at the end of the assistant's last text + * block and that's the part the post-compaction model needs most. + */ +const TAIL_ANCHOR_MAX_CHARS = 1500; +const TAIL_ANCHOR_OPEN_TAG = ""; +const TAIL_ANCHOR_CLOSE_TAG = ""; + /** * When the existing summary is this fraction or more of the per-summary * token budget, inject a "compress older content aggressively" instruction @@ -688,7 +701,6 @@ export class ContextWindowManager { signal, options?.overrideProfile ?? null, ); - const summary = summaryUpdate.summary; const summaryInputTokens = summaryUpdate.inputTokens; const summaryOutputTokens = summaryUpdate.outputTokens; const summaryModel = summaryUpdate.model; @@ -704,6 +716,19 @@ export class ContextWindowManager { } const summaryCalls = 1; + // Force-keep the most recent assistant text from the compactable region + // by splicing it verbatim into the summary message. This is independent + // of what the LLM summarizer chose to surface — when compaction + // interrupts a long assistant work span, this anchor preserves the + // model's last self-narration ("Next step: …", "About to …") so the + // post-compaction model has unambiguous continuity instead of falling + // back to a "where am I?" recovery shape. + const tailAnchorText = extractTailAssistantText(compactableMessages); + const summary = + tailAnchorText != null + ? appendTailAnchorToSummary(summaryUpdate.summary, tailAnchorText) + : summaryUpdate.summary; + // Media (images, files) in kept turns is preserved naturally — those // turns are carried forward as-is and their token cost is already // accounted for by pickKeepBoundary's estimatePromptTokens call. @@ -1286,6 +1311,63 @@ export function createContextSummaryMessage(summary: string): Message { return message; } +/** + * Walk `messages` backward and return the concatenated text content of the + * most recent assistant message that contains at least one non-empty text + * block. tool_use / tool_result / image / unknown blocks are skipped. The + * result is trimmed and (if longer than `maxChars`) clamped from the START + * so the END — where "next step" / "now I'll …" narration tends to land — + * is preserved. + * + * Returns `null` when no eligible assistant text is found (e.g. compactable + * region was all user/tool messages, or all assistant messages were + * tool_use-only). The caller treats `null` as "no anchor to splice". + * + * Used by `_maybeCompact` to force-keep the last assistant text from the + * compactable region into the post-compaction summary message, so the + * model's most recent self-narration survives summarization regardless of + * whether the LLM summarizer chose to surface it. + */ +export function extractTailAssistantText( + messages: Message[], + maxChars: number = TAIL_ANCHOR_MAX_CHARS, +): string | null { + for (let i = messages.length - 1; i >= 0; i--) { + const message = messages[i]; + if (message?.role !== "assistant") continue; + const text = extractText(message.content).trim(); + if (text.length === 0) continue; + if (text.length <= maxChars) return text; + // Keep the END — most recent narration wins. + const truncated = safeStringSlice(text, text.length - maxChars, text.length); + return `[...truncated] ${truncated}`; + } + return null; +} + +/** + * Splice a verbatim tail-anchor block onto the end of the LLM-produced + * summary text. The tag-wrapped block is structurally distinct from any + * `## ` section the LLM might generate, so it survives section-boundary + * clamping in `clampSummaryAtSectionBoundary` (which only runs on the LLM + * summary itself, before this splice). + * + * Idempotent: if the summary already ends with a `…` block + * (e.g. from a prior compaction whose summary was carried forward as + * `existingSummary`), it is replaced rather than stacked, so successive + * compactions don't accumulate stale tails. + */ +export function appendTailAnchorToSummary( + summary: string, + tailText: string, +): string { + const trimmed = summary.trimEnd(); + const existingOpen = trimmed.lastIndexOf(TAIL_ANCHOR_OPEN_TAG); + const base = + existingOpen >= 0 ? trimmed.slice(0, existingOpen).trimEnd() : trimmed; + return `${base}\n\n${TAIL_ANCHOR_OPEN_TAG}\n${tailText.trim()}\n${TAIL_ANCHOR_CLOSE_TAG}`; +} + /** * Build content blocks for the summary prompt. Returns a mix of text blocks * (for the scaffolding, existing summary, and serialized non-image content)