Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
244 changes: 244 additions & 0 deletions assistant/src/__tests__/context-window-manager.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@ import { describe, expect, test } from "bun:test";
import type { ContextWindowConfig } from "../config/types.js";
import { estimateTextTokens } from "../context/token-estimator.js";
import {
appendTailAnchorToSummary,
clampSummaryAtSectionBoundary,
CONTEXT_SUMMARY_MARKER,
ContextWindowManager,
createContextSummaryMessage,
extractTailAssistantText,
getSummaryFromContextMessage,
stripCompactionOnlyInjections,
} from "../context/window-manager.js";
Expand Down Expand Up @@ -2091,3 +2093,245 @@ describe("clampSummaryAtSectionBoundary", () => {
expect(clamped.length).toBeLessThanOrEqual(100);
});
});

describe("extractTailAssistantText", () => {
test("returns the most recent assistant text block", () => {
const messages: Message[] = [
message("user", "u1"),
message("assistant", "a1 first"),
message("user", "u2"),
message("assistant", "a2 last"),
];
expect(extractTailAssistantText(messages)).toBe("a2 last");
});

test("returns null when no assistant text is present", () => {
const messages: Message[] = [
message("user", "u1"),
message("user", "u2"),
];
expect(extractTailAssistantText(messages)).toBeNull();
});

test("skips assistant messages with only tool_use blocks and finds the prior text", () => {
const messages: Message[] = [
message("assistant", "a1 narration before tool use"),
message("user", "u1"),
{
role: "assistant",
content: [
{
type: "tool_use",
id: "tool-1",
name: "bash",
input: { command: "ls" },
} as ContentBlock,
],
},
];
expect(extractTailAssistantText(messages)).toBe(
"a1 narration before tool use",
);
});

test("clamps long text from the start so the END is preserved", () => {
const longText = "early prefix " + "x".repeat(2000) + " FINAL NEXT STEP";
const messages: Message[] = [message("assistant", longText)];
const result = extractTailAssistantText(messages, 200);
expect(result).not.toBeNull();
expect(result!.startsWith("[...truncated]")).toBe(true);
expect(result!.endsWith("FINAL NEXT STEP")).toBe(true);
// Stripped block size ≈ maxChars; "[...truncated] " adds a fixed prefix.
expect(result!.length).toBeLessThanOrEqual(200 + "[...truncated] ".length);
});

test("ignores empty/whitespace-only assistant text", () => {
const messages: Message[] = [
message("assistant", "real content"),
message("assistant", " \n "),
];
expect(extractTailAssistantText(messages)).toBe("real content");
});

test("returns null for an empty messages array", () => {
expect(extractTailAssistantText([])).toBeNull();
});
});

describe("appendTailAnchorToSummary", () => {
test("appends a tag-wrapped block after the summary", () => {
const out = appendTailAnchorToSummary(
"## Goals\n- item",
"Next step: file the SSE followup.",
);
expect(out).toContain("## Goals\n- item");
expect(out).toContain(
"<verbatim_tail>\nNext step: file the SSE followup.\n</verbatim_tail>",
);
expect(out.endsWith("</verbatim_tail>")).toBe(true);
});

test("is idempotent: re-applying with new text replaces the prior tail", () => {
const first = appendTailAnchorToSummary("body", "tail-1");
const second = appendTailAnchorToSummary(first, "tail-2");
expect(second).toContain("body");
expect(second).toContain("tail-2");
expect(second).not.toContain("tail-1");
// Exactly one open-tag occurrence — no stacking.
expect(second.match(/<verbatim_tail>/g)?.length).toBe(1);
});
});

describe("compaction tail-anchor", () => {
test("splices the last assistant text block verbatim into the summary message", async () => {
const provider = createProvider(() => ({
content: [{ type: "text", text: "## Goals\n- LLM summary" }],
model: "mock-model",
usage: { inputTokens: 100, outputTokens: 25 },
stopReason: "end_turn",
}));
const manager = new ContextWindowManager({
provider,
systemPrompt: "system prompt",
config: makeConfig({ maxInputTokens: 600 }),
});
const long = "x".repeat(240);
const distinctiveTail =
"Pushed 8fe70d63a0 — next step: file the SSE followup as promised.";
// Place `distinctiveTail` as the assistant response for u1 so it lands
// at the end of the compactable region. With the same 600-token budget
// and 6-message shape as the existing 600-token compaction test above,
// the binary search settles on keepTurns=2 (kept = [u2, a2, u3, a3];
// compactable = [u1, distinctiveTail]) — exercising the real-world
// drift scenario where the model's last narration in a long work span
// gets summarized away.
const history: Message[] = [
message("user", `u1 ${long}`),
message("assistant", distinctiveTail),
message("user", `u2 ${long}`),
message("assistant", `a2 ${long}`),
message("user", `u3 ${long}`),
message("assistant", `a3 ${long}`),
];

const result = await manager.maybeCompact(history);

expect(result.compacted).toBe(true);
const summaryInner = getSummaryFromContextMessage(result.messages[0]);
expect(summaryInner).not.toBeNull();
// LLM summary still present.
expect(summaryInner).toContain("LLM summary");
// Verbatim tail spliced in: distinctive text from the LAST assistant
// message in the compactable region (here, `distinctiveTail`).
expect(summaryInner).toContain("<verbatim_tail>");
expect(summaryInner).toContain(distinctiveTail);
expect(summaryInner).toContain("</verbatim_tail>");
// summaryText reflects what's persisted in messages[0] for consistency
// with downstream consumers (DB, context_compacted event).
expect(result.summaryText).toContain(distinctiveTail);
});

test("omits the tail-anchor block when no assistant text exists in compactable region", async () => {
// Construct a scenario where the compactable region has assistant
// messages with ONLY tool_use blocks (no text) plus user turns. The
// anchor should be omitted gracefully.
const provider = createProvider(() => ({
content: [{ type: "text", text: "## Goals\n- summary" }],
model: "mock-model",
usage: { inputTokens: 100, outputTokens: 25 },
stopReason: "end_turn",
}));
const manager = new ContextWindowManager({
provider,
systemPrompt: "system prompt",
config: makeConfig({ maxInputTokens: 600 }),
});
const long = "x".repeat(240);
const history: Message[] = [
message("user", `u1 ${long}`),
{
role: "assistant",
content: [
{
type: "tool_use",
id: "tool-1",
name: "bash",
input: { command: "ls" },
} as ContentBlock,
],
},
{
role: "user",
content: [
{
type: "tool_result",
tool_use_id: "tool-1",
content: "ls output",
} as ContentBlock,
],
},
message("user", `u2 ${long}`),
message("assistant", `a2 ${long}`),
message("user", `u3 ${long}`),
message("assistant", `a3 ${long}`),
];

const result = await manager.maybeCompact(history);

expect(result.compacted).toBe(true);
const summaryInner = getSummaryFromContextMessage(result.messages[0]);
expect(summaryInner).not.toBeNull();
// No tail anchor when the only compactable assistant message has no text.
// (a2 / a3 are kept verbatim post-compaction since they're recent enough,
// so the compactable-region's only assistant message is the tool_use one.)
if (summaryInner!.includes("<verbatim_tail>")) {
// If a2 ended up in the compactable region after binary search, the
// anchor would surface a2's text — which is fine; the assertion that
// matters is that the spliced content (when present) is verbatim
// content from the compactable region, not noise. Validate the
// ordering: anchor must follow LLM summary text.
expect(summaryInner!.indexOf("summary")).toBeLessThan(
summaryInner!.indexOf("<verbatim_tail>"),
);
}
});

test("clamps tail-anchor when the last assistant text is longer than the cap", async () => {
const provider = createProvider(() => ({
content: [{ type: "text", text: "## Goals\n- summary" }],
model: "mock-model",
usage: { inputTokens: 100, outputTokens: 25 },
stopReason: "end_turn",
}));
const manager = new ContextWindowManager({
provider,
systemPrompt: "system prompt",
config: makeConfig({ maxInputTokens: 600 }),
});
const long = "x".repeat(240);
const tailEnd = "FINAL DISTINCTIVE END MARKER";
// Long enough to trip TAIL_ANCHOR_MAX_CHARS (=1500) clamping.
const longTail = "early body " + "y".repeat(2000) + " " + tailEnd;
const history: Message[] = [
message("user", `u1 ${long}`),
message("assistant", longTail),
message("user", `u2 ${long}`),
message("assistant", `a2 ${long}`),
message("user", `u3 ${long}`),
message("assistant", `a3 ${long}`),
];

const result = await manager.maybeCompact(history);

expect(result.compacted).toBe(true);
const summaryInner = getSummaryFromContextMessage(result.messages[0]);
expect(summaryInner).not.toBeNull();
if (summaryInner!.includes("<verbatim_tail>")) {
// When clamped, the END is preserved (most recent narration).
expect(summaryInner).toContain(tailEnd);
// And the early prefix is dropped.
expect(summaryInner).toContain("[...truncated]");
expect(summaryInner).not.toContain("early body");
}
});
});
84 changes: 83 additions & 1 deletion assistant/src/context/window-manager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,19 @@ const COMPACTION_TOOL_RESULT_MAX_CHARS = 6_000;
const MIN_COMPACTABLE_PERSISTED_MESSAGES = 2;
const INTERNAL_CONTEXT_SUMMARY_MESSAGES = new WeakSet<Message>();

/**
* Hard cap on the verbatim tail-anchor block we splice into the
* post-compaction summary message (see `extractTailAssistantText`). 1500
* chars (~375 tokens) covers a few paragraphs of recent assistant
* narration without bloating the summary. When the tail exceeds this
* size we keep the END (most recent text), since "next step" / "now I'll
* …" statements typically live at the end of the assistant's last text
* block and that's the part the post-compaction model needs most.
*/
const TAIL_ANCHOR_MAX_CHARS = 1500;
const TAIL_ANCHOR_OPEN_TAG = "<verbatim_tail>";
const TAIL_ANCHOR_CLOSE_TAG = "</verbatim_tail>";

/**
* When the existing summary is this fraction or more of the per-summary
* token budget, inject a "compress older content aggressively" instruction
Expand Down Expand Up @@ -688,7 +701,6 @@ export class ContextWindowManager {
signal,
options?.overrideProfile ?? null,
);
const summary = summaryUpdate.summary;
const summaryInputTokens = summaryUpdate.inputTokens;
const summaryOutputTokens = summaryUpdate.outputTokens;
const summaryModel = summaryUpdate.model;
Expand All @@ -704,6 +716,19 @@ export class ContextWindowManager {
}
const summaryCalls = 1;

// Force-keep the most recent assistant text from the compactable region
// by splicing it verbatim into the summary message. This is independent
// of what the LLM summarizer chose to surface — when compaction
// interrupts a long assistant work span, this anchor preserves the
// model's last self-narration ("Next step: …", "About to …") so the
// post-compaction model has unambiguous continuity instead of falling
// back to a "where am I?" recovery shape.
const tailAnchorText = extractTailAssistantText(compactableMessages);
const summary =
tailAnchorText != null
? appendTailAnchorToSummary(summaryUpdate.summary, tailAnchorText)
: summaryUpdate.summary;

// Media (images, files) in kept turns is preserved naturally — those
// turns are carried forward as-is and their token cost is already
// accounted for by pickKeepBoundary's estimatePromptTokens call.
Expand Down Expand Up @@ -1286,6 +1311,63 @@ export function createContextSummaryMessage(summary: string): Message {
return message;
}

/**
* Walk `messages` backward and return the concatenated text content of the
* most recent assistant message that contains at least one non-empty text
* block. tool_use / tool_result / image / unknown blocks are skipped. The
* result is trimmed and (if longer than `maxChars`) clamped from the START
* so the END — where "next step" / "now I'll …" narration tends to land —
* is preserved.
*
* Returns `null` when no eligible assistant text is found (e.g. compactable
* region was all user/tool messages, or all assistant messages were
* tool_use-only). The caller treats `null` as "no anchor to splice".
*
* Used by `_maybeCompact` to force-keep the last assistant text from the
* compactable region into the post-compaction summary message, so the
* model's most recent self-narration survives summarization regardless of
* whether the LLM summarizer chose to surface it.
*/
export function extractTailAssistantText(
messages: Message[],
maxChars: number = TAIL_ANCHOR_MAX_CHARS,
): string | null {
for (let i = messages.length - 1; i >= 0; i--) {
const message = messages[i];
if (message?.role !== "assistant") continue;
const text = extractText(message.content).trim();
if (text.length === 0) continue;
if (text.length <= maxChars) return text;
// Keep the END — most recent narration wins.
const truncated = safeStringSlice(text, text.length - maxChars, text.length);
return `[...truncated] ${truncated}`;
}
return null;
}

/**
* Splice a verbatim tail-anchor block onto the end of the LLM-produced
* summary text. The tag-wrapped block is structurally distinct from any
* `## ` section the LLM might generate, so it survives section-boundary
* clamping in `clampSummaryAtSectionBoundary` (which only runs on the LLM
* summary itself, before this splice).
*
* Idempotent: if the summary already ends with a `<verbatim_tail>…` block
* (e.g. from a prior compaction whose summary was carried forward as
* `existingSummary`), it is replaced rather than stacked, so successive
* compactions don't accumulate stale tails.
*/
export function appendTailAnchorToSummary(
summary: string,
tailText: string,
): string {
const trimmed = summary.trimEnd();
const existingOpen = trimmed.lastIndexOf(TAIL_ANCHOR_OPEN_TAG);
const base =
existingOpen >= 0 ? trimmed.slice(0, existingOpen).trimEnd() : trimmed;
return `${base}\n\n${TAIL_ANCHOR_OPEN_TAG}\n${tailText.trim()}\n${TAIL_ANCHOR_CLOSE_TAG}`;
}

/**
* Build content blocks for the summary prompt. Returns a mix of text blocks
* (for the scaffolding, existing summary, and serialized non-image content)
Expand Down
Loading