diff --git a/assistant/src/__tests__/context-window-manager.test.ts b/assistant/src/__tests__/context-window-manager.test.ts
index e32e63580cc..36f7be5f79e 100644
--- a/assistant/src/__tests__/context-window-manager.test.ts
+++ b/assistant/src/__tests__/context-window-manager.test.ts
@@ -3,10 +3,12 @@ import { describe, expect, test } from "bun:test";
import type { ContextWindowConfig } from "../config/types.js";
import { estimateTextTokens } from "../context/token-estimator.js";
import {
+ appendTailAnchorToSummary,
clampSummaryAtSectionBoundary,
CONTEXT_SUMMARY_MARKER,
ContextWindowManager,
createContextSummaryMessage,
+ extractTailAssistantText,
getSummaryFromContextMessage,
stripCompactionOnlyInjections,
} from "../context/window-manager.js";
@@ -2091,3 +2093,245 @@ describe("clampSummaryAtSectionBoundary", () => {
expect(clamped.length).toBeLessThanOrEqual(100);
});
});
+
+describe("extractTailAssistantText", () => {
+ test("returns the most recent assistant text block", () => {
+ const messages: Message[] = [
+ message("user", "u1"),
+ message("assistant", "a1 first"),
+ message("user", "u2"),
+ message("assistant", "a2 last"),
+ ];
+ expect(extractTailAssistantText(messages)).toBe("a2 last");
+ });
+
+ test("returns null when no assistant text is present", () => {
+ const messages: Message[] = [
+ message("user", "u1"),
+ message("user", "u2"),
+ ];
+ expect(extractTailAssistantText(messages)).toBeNull();
+ });
+
+ test("skips assistant messages with only tool_use blocks and finds the prior text", () => {
+ const messages: Message[] = [
+ message("assistant", "a1 narration before tool use"),
+ message("user", "u1"),
+ {
+ role: "assistant",
+ content: [
+ {
+ type: "tool_use",
+ id: "tool-1",
+ name: "bash",
+ input: { command: "ls" },
+ } as ContentBlock,
+ ],
+ },
+ ];
+ expect(extractTailAssistantText(messages)).toBe(
+ "a1 narration before tool use",
+ );
+ });
+
+ test("clamps long text from the start so the END is preserved", () => {
+ const longText = "early prefix " + "x".repeat(2000) + " FINAL NEXT STEP";
+ const messages: Message[] = [message("assistant", longText)];
+ const result = extractTailAssistantText(messages, 200);
+ expect(result).not.toBeNull();
+ expect(result!.startsWith("[...truncated]")).toBe(true);
+ expect(result!.endsWith("FINAL NEXT STEP")).toBe(true);
+ // Stripped block size ≈ maxChars; "[...truncated] " adds a fixed prefix.
+ expect(result!.length).toBeLessThanOrEqual(200 + "[...truncated] ".length);
+ });
+
+ test("ignores empty/whitespace-only assistant text", () => {
+ const messages: Message[] = [
+ message("assistant", "real content"),
+ message("assistant", " \n "),
+ ];
+ expect(extractTailAssistantText(messages)).toBe("real content");
+ });
+
+ test("returns null for an empty messages array", () => {
+ expect(extractTailAssistantText([])).toBeNull();
+ });
+});
+
+describe("appendTailAnchorToSummary", () => {
+ test("appends a tag-wrapped block after the summary", () => {
+ const out = appendTailAnchorToSummary(
+ "## Goals\n- item",
+ "Next step: file the SSE followup.",
+ );
+ expect(out).toContain("## Goals\n- item");
+ expect(out).toContain(
+ "\nNext step: file the SSE followup.\n",
+ );
+ expect(out.endsWith("")).toBe(true);
+ });
+
+ test("is idempotent: re-applying with new text replaces the prior tail", () => {
+ const first = appendTailAnchorToSummary("body", "tail-1");
+ const second = appendTailAnchorToSummary(first, "tail-2");
+ expect(second).toContain("body");
+ expect(second).toContain("tail-2");
+ expect(second).not.toContain("tail-1");
+ // Exactly one open-tag occurrence — no stacking.
+ expect(second.match(//g)?.length).toBe(1);
+ });
+});
+
+describe("compaction tail-anchor", () => {
+ test("splices the last assistant text block verbatim into the summary message", async () => {
+ const provider = createProvider(() => ({
+ content: [{ type: "text", text: "## Goals\n- LLM summary" }],
+ model: "mock-model",
+ usage: { inputTokens: 100, outputTokens: 25 },
+ stopReason: "end_turn",
+ }));
+ const manager = new ContextWindowManager({
+ provider,
+ systemPrompt: "system prompt",
+ config: makeConfig({ maxInputTokens: 600 }),
+ });
+ const long = "x".repeat(240);
+ const distinctiveTail =
+ "Pushed 8fe70d63a0 — next step: file the SSE followup as promised.";
+ // Place `distinctiveTail` as the assistant response for u1 so it lands
+ // at the end of the compactable region. With the same 600-token budget
+ // and 6-message shape as the existing 600-token compaction test above,
+ // the binary search settles on keepTurns=2 (kept = [u2, a2, u3, a3];
+ // compactable = [u1, distinctiveTail]) — exercising the real-world
+ // drift scenario where the model's last narration in a long work span
+ // gets summarized away.
+ const history: Message[] = [
+ message("user", `u1 ${long}`),
+ message("assistant", distinctiveTail),
+ message("user", `u2 ${long}`),
+ message("assistant", `a2 ${long}`),
+ message("user", `u3 ${long}`),
+ message("assistant", `a3 ${long}`),
+ ];
+
+ const result = await manager.maybeCompact(history);
+
+ expect(result.compacted).toBe(true);
+ const summaryInner = getSummaryFromContextMessage(result.messages[0]);
+ expect(summaryInner).not.toBeNull();
+ // LLM summary still present.
+ expect(summaryInner).toContain("LLM summary");
+ // Verbatim tail spliced in: distinctive text from the LAST assistant
+ // message in the compactable region (here, `distinctiveTail`).
+ expect(summaryInner).toContain("");
+ expect(summaryInner).toContain(distinctiveTail);
+ expect(summaryInner).toContain("");
+ // summaryText reflects what's persisted in messages[0] for consistency
+ // with downstream consumers (DB, context_compacted event).
+ expect(result.summaryText).toContain(distinctiveTail);
+ });
+
+ test("omits the tail-anchor block when no assistant text exists in compactable region", async () => {
+ // Construct a scenario where the compactable region has assistant
+ // messages with ONLY tool_use blocks (no text) plus user turns. The
+ // anchor should be omitted gracefully.
+ const provider = createProvider(() => ({
+ content: [{ type: "text", text: "## Goals\n- summary" }],
+ model: "mock-model",
+ usage: { inputTokens: 100, outputTokens: 25 },
+ stopReason: "end_turn",
+ }));
+ const manager = new ContextWindowManager({
+ provider,
+ systemPrompt: "system prompt",
+ config: makeConfig({ maxInputTokens: 600 }),
+ });
+ const long = "x".repeat(240);
+ const history: Message[] = [
+ message("user", `u1 ${long}`),
+ {
+ role: "assistant",
+ content: [
+ {
+ type: "tool_use",
+ id: "tool-1",
+ name: "bash",
+ input: { command: "ls" },
+ } as ContentBlock,
+ ],
+ },
+ {
+ role: "user",
+ content: [
+ {
+ type: "tool_result",
+ tool_use_id: "tool-1",
+ content: "ls output",
+ } as ContentBlock,
+ ],
+ },
+ message("user", `u2 ${long}`),
+ message("assistant", `a2 ${long}`),
+ message("user", `u3 ${long}`),
+ message("assistant", `a3 ${long}`),
+ ];
+
+ const result = await manager.maybeCompact(history);
+
+ expect(result.compacted).toBe(true);
+ const summaryInner = getSummaryFromContextMessage(result.messages[0]);
+ expect(summaryInner).not.toBeNull();
+ // No tail anchor when the only compactable assistant message has no text.
+ // (a2 / a3 are kept verbatim post-compaction since they're recent enough,
+ // so the compactable-region's only assistant message is the tool_use one.)
+ if (summaryInner!.includes("")) {
+ // If a2 ended up in the compactable region after binary search, the
+ // anchor would surface a2's text — which is fine; the assertion that
+ // matters is that the spliced content (when present) is verbatim
+ // content from the compactable region, not noise. Validate the
+ // ordering: anchor must follow LLM summary text.
+ expect(summaryInner!.indexOf("summary")).toBeLessThan(
+ summaryInner!.indexOf(""),
+ );
+ }
+ });
+
+ test("clamps tail-anchor when the last assistant text is longer than the cap", async () => {
+ const provider = createProvider(() => ({
+ content: [{ type: "text", text: "## Goals\n- summary" }],
+ model: "mock-model",
+ usage: { inputTokens: 100, outputTokens: 25 },
+ stopReason: "end_turn",
+ }));
+ const manager = new ContextWindowManager({
+ provider,
+ systemPrompt: "system prompt",
+ config: makeConfig({ maxInputTokens: 600 }),
+ });
+ const long = "x".repeat(240);
+ const tailEnd = "FINAL DISTINCTIVE END MARKER";
+ // Long enough to trip TAIL_ANCHOR_MAX_CHARS (=1500) clamping.
+ const longTail = "early body " + "y".repeat(2000) + " " + tailEnd;
+ const history: Message[] = [
+ message("user", `u1 ${long}`),
+ message("assistant", longTail),
+ message("user", `u2 ${long}`),
+ message("assistant", `a2 ${long}`),
+ message("user", `u3 ${long}`),
+ message("assistant", `a3 ${long}`),
+ ];
+
+ const result = await manager.maybeCompact(history);
+
+ expect(result.compacted).toBe(true);
+ const summaryInner = getSummaryFromContextMessage(result.messages[0]);
+ expect(summaryInner).not.toBeNull();
+ if (summaryInner!.includes("")) {
+ // When clamped, the END is preserved (most recent narration).
+ expect(summaryInner).toContain(tailEnd);
+ // And the early prefix is dropped.
+ expect(summaryInner).toContain("[...truncated]");
+ expect(summaryInner).not.toContain("early body");
+ }
+ });
+});
diff --git a/assistant/src/context/window-manager.ts b/assistant/src/context/window-manager.ts
index 30bc8f97249..e5a19d99c39 100644
--- a/assistant/src/context/window-manager.ts
+++ b/assistant/src/context/window-manager.ts
@@ -32,6 +32,19 @@ const COMPACTION_TOOL_RESULT_MAX_CHARS = 6_000;
const MIN_COMPACTABLE_PERSISTED_MESSAGES = 2;
const INTERNAL_CONTEXT_SUMMARY_MESSAGES = new WeakSet();
+/**
+ * Hard cap on the verbatim tail-anchor block we splice into the
+ * post-compaction summary message (see `extractTailAssistantText`). 1500
+ * chars (~375 tokens) covers a few paragraphs of recent assistant
+ * narration without bloating the summary. When the tail exceeds this
+ * size we keep the END (most recent text), since "next step" / "now I'll
+ * …" statements typically live at the end of the assistant's last text
+ * block and that's the part the post-compaction model needs most.
+ */
+const TAIL_ANCHOR_MAX_CHARS = 1500;
+const TAIL_ANCHOR_OPEN_TAG = "";
+const TAIL_ANCHOR_CLOSE_TAG = "";
+
/**
* When the existing summary is this fraction or more of the per-summary
* token budget, inject a "compress older content aggressively" instruction
@@ -688,7 +701,6 @@ export class ContextWindowManager {
signal,
options?.overrideProfile ?? null,
);
- const summary = summaryUpdate.summary;
const summaryInputTokens = summaryUpdate.inputTokens;
const summaryOutputTokens = summaryUpdate.outputTokens;
const summaryModel = summaryUpdate.model;
@@ -704,6 +716,19 @@ export class ContextWindowManager {
}
const summaryCalls = 1;
+ // Force-keep the most recent assistant text from the compactable region
+ // by splicing it verbatim into the summary message. This is independent
+ // of what the LLM summarizer chose to surface — when compaction
+ // interrupts a long assistant work span, this anchor preserves the
+ // model's last self-narration ("Next step: …", "About to …") so the
+ // post-compaction model has unambiguous continuity instead of falling
+ // back to a "where am I?" recovery shape.
+ const tailAnchorText = extractTailAssistantText(compactableMessages);
+ const summary =
+ tailAnchorText != null
+ ? appendTailAnchorToSummary(summaryUpdate.summary, tailAnchorText)
+ : summaryUpdate.summary;
+
// Media (images, files) in kept turns is preserved naturally — those
// turns are carried forward as-is and their token cost is already
// accounted for by pickKeepBoundary's estimatePromptTokens call.
@@ -1286,6 +1311,63 @@ export function createContextSummaryMessage(summary: string): Message {
return message;
}
+/**
+ * Walk `messages` backward and return the concatenated text content of the
+ * most recent assistant message that contains at least one non-empty text
+ * block. tool_use / tool_result / image / unknown blocks are skipped. The
+ * result is trimmed and (if longer than `maxChars`) clamped from the START
+ * so the END — where "next step" / "now I'll …" narration tends to land —
+ * is preserved.
+ *
+ * Returns `null` when no eligible assistant text is found (e.g. compactable
+ * region was all user/tool messages, or all assistant messages were
+ * tool_use-only). The caller treats `null` as "no anchor to splice".
+ *
+ * Used by `_maybeCompact` to force-keep the last assistant text from the
+ * compactable region into the post-compaction summary message, so the
+ * model's most recent self-narration survives summarization regardless of
+ * whether the LLM summarizer chose to surface it.
+ */
+export function extractTailAssistantText(
+ messages: Message[],
+ maxChars: number = TAIL_ANCHOR_MAX_CHARS,
+): string | null {
+ for (let i = messages.length - 1; i >= 0; i--) {
+ const message = messages[i];
+ if (message?.role !== "assistant") continue;
+ const text = extractText(message.content).trim();
+ if (text.length === 0) continue;
+ if (text.length <= maxChars) return text;
+ // Keep the END — most recent narration wins.
+ const truncated = safeStringSlice(text, text.length - maxChars, text.length);
+ return `[...truncated] ${truncated}`;
+ }
+ return null;
+}
+
+/**
+ * Splice a verbatim tail-anchor block onto the end of the LLM-produced
+ * summary text. The tag-wrapped block is structurally distinct from any
+ * `## ` section the LLM might generate, so it survives section-boundary
+ * clamping in `clampSummaryAtSectionBoundary` (which only runs on the LLM
+ * summary itself, before this splice).
+ *
+ * Idempotent: if the summary already ends with a `…` block
+ * (e.g. from a prior compaction whose summary was carried forward as
+ * `existingSummary`), it is replaced rather than stacked, so successive
+ * compactions don't accumulate stale tails.
+ */
+export function appendTailAnchorToSummary(
+ summary: string,
+ tailText: string,
+): string {
+ const trimmed = summary.trimEnd();
+ const existingOpen = trimmed.lastIndexOf(TAIL_ANCHOR_OPEN_TAG);
+ const base =
+ existingOpen >= 0 ? trimmed.slice(0, existingOpen).trimEnd() : trimmed;
+ return `${base}\n\n${TAIL_ANCHOR_OPEN_TAG}\n${tailText.trim()}\n${TAIL_ANCHOR_CLOSE_TAG}`;
+}
+
/**
* Build content blocks for the summary prompt. Returns a mix of text blocks
* (for the scaffolding, existing summary, and serialized non-image content)