Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 69 additions & 1 deletion packages/opencode/src/session/compaction.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,74 @@ export namespace SessionCompaction {

const PRUNE_PROTECTED_TOOLS = ["skill"]

// Reserve tokens for compaction prompt
const COMPACTION_PROMPT_RESERVE = 2000

function estimateMessageTokens(msg: MessageV2.WithParts): number {
let chars = 0
for (const part of msg.parts) {
if (part.type === "text") chars += part.text.length
if (part.type === "reasoning") chars += part.text.length
if (part.type === "tool" && part.state.status === "completed") {
if (!part.state.time.compacted) chars += part.state.output.length
chars += JSON.stringify(part.state.input).length
}
if (part.type === "tool" && part.state.status === "error") {
chars += part.state.error.length
}
}
return Token.estimate(String.fromCharCode(0).repeat(chars))
}

// truncates messages to fit within context window for compaction.
// prioritizes recent messages while preserving summary messages.
export function truncateForCompaction(
messages: MessageV2.WithParts[],
model: Provider.Model
): MessageV2.WithParts[] {
const outputReserve = Math.min(model.limit.output, SessionPrompt.OUTPUT_TOKEN_MAX) || SessionPrompt.OUTPUT_TOKEN_MAX
const inputLimit = model.limit.input || model.limit.context - outputReserve
const targetLimit = inputLimit - COMPACTION_PROMPT_RESERVE

if (targetLimit <= 0) return messages

// collect summary messages first
const summaryMessages: MessageV2.WithParts[] = []
let summaryTokens = 0
for (const msg of messages) {
if (msg.info.role === "assistant" && (msg.info as MessageV2.Assistant).summary) {
summaryMessages.push(msg)
summaryTokens += estimateMessageTokens(msg)
}
}

if (summaryTokens >= targetLimit) return summaryMessages

// add messages from end until limit reached
const result: MessageV2.WithParts[] = []
let estimatedTokens = summaryTokens
const summaryIds = new Set(summaryMessages.map((m) => m.info.id))

for (let i = messages.length - 1; i >= 0; i--) {
const msg = messages[i]
if (summaryIds.has(msg.info.id)) continue

const msgTokens = estimateMessageTokens(msg)
if (estimatedTokens + msgTokens > targetLimit) {
log.info("truncateForCompaction", { included: result.length, skipped: i + 1 })
break
}

result.unshift(msg)
estimatedTokens += msgTokens
}

const finalResult = [...summaryMessages, ...result]
finalResult.sort((a, b) => (a.info.id > b.info.id ? 1 : -1))

return finalResult
}

// goes backwards through parts until there are 40_000 tokens worth of tool
// calls. then erases output of previous tool calls. idea is to throw away old
// tool calls that are no longer relevant.
Expand Down Expand Up @@ -149,7 +217,7 @@ export namespace SessionCompaction {
tools: {},
system: [],
messages: [
...MessageV2.toModelMessages(input.messages, model),
...MessageV2.toModelMessages(truncateForCompaction(input.messages, model), model),
{
role: "user",
content: [
Expand Down
155 changes: 155 additions & 0 deletions packages/opencode/test/session/compaction.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,161 @@ describe("session.compaction.isOverflow", () => {
})
})

describe("session.compaction.truncateForCompaction", () => {
function createMessage(opts: {
id: string
role: "user" | "assistant"
textLength: number
isSummary?: boolean
}): import("../../src/session/message-v2").MessageV2.WithParts {
const baseInfo = {
id: opts.id,
sessionID: "test-session",
}

const parts: import("../../src/session/message-v2").MessageV2.Part[] = [
{
id: `part-${opts.id}`,
sessionID: "test-session",
messageID: opts.id,
type: "text" as const,
text: "x".repeat(opts.textLength),
},
]

if (opts.role === "user") {
return {
info: {
...baseInfo,
role: "user" as const,
time: { created: Date.now() },
agent: "test-agent",
model: { providerID: "test", modelID: "test-model" },
},
parts,
}
}

return {
info: {
...baseInfo,
role: "assistant" as const,
time: { created: Date.now() },
parentID: "parent",
modelID: "test-model",
providerID: "test",
mode: "test",
agent: "test-agent",
path: { cwd: "/", root: "/" },
cost: 0,
tokens: { input: 0, output: 0, reasoning: 0, cache: { read: 0, write: 0 } },
summary: opts.isSummary ?? false,
},
parts,
}
}

test("returns all messages when within limit", async () => {
await using tmp = await tmpdir()
await Instance.provide({
directory: tmp.path,
fn: async () => {
const model = createModel({ context: 100_000, output: 8_000 })
const messages = [
createMessage({ id: "1", role: "user", textLength: 1000 }),
createMessage({ id: "2", role: "assistant", textLength: 1000 }),
]

const result = SessionCompaction.truncateForCompaction(messages, model)
expect(result.length).toBe(2)
},
})
})

test("truncates messages when exceeding limit", async () => {
await using tmp = await tmpdir()
await Instance.provide({
directory: tmp.path,
fn: async () => {
// Model with 10k context, 2k output = 8k input, minus 2k reserve = 6k usable
// Each message with 8000 chars = ~2000 tokens
const model = createModel({ context: 10_000, output: 2_000 })
const messages = [
createMessage({ id: "1", role: "user", textLength: 8000 }),
createMessage({ id: "2", role: "assistant", textLength: 8000 }),
createMessage({ id: "3", role: "user", textLength: 8000 }),
createMessage({ id: "4", role: "assistant", textLength: 8000 }),
]

const result = SessionCompaction.truncateForCompaction(messages, model)
expect(result.length).toBeLessThan(messages.length)
},
})
})

test("preserves summary messages even when truncating", async () => {
await using tmp = await tmpdir()
await Instance.provide({
directory: tmp.path,
fn: async () => {
const model = createModel({ context: 10_000, output: 2_000 })
const messages = [
createMessage({ id: "1", role: "assistant", textLength: 1000, isSummary: true }),
createMessage({ id: "2", role: "user", textLength: 8000 }),
createMessage({ id: "3", role: "assistant", textLength: 8000 }),
createMessage({ id: "4", role: "user", textLength: 8000 }),
]

const result = SessionCompaction.truncateForCompaction(messages, model)
const hasSummary = result.some(
(m) => m.info.role === "assistant" && (m.info as any).summary === true
)
expect(hasSummary).toBe(true)
},
})
})

test("prioritizes recent messages over older ones", async () => {
await using tmp = await tmpdir()
await Instance.provide({
directory: tmp.path,
fn: async () => {
const model = createModel({ context: 10_000, output: 2_000 })
const messages = [
createMessage({ id: "1", role: "user", textLength: 8000 }),
createMessage({ id: "2", role: "assistant", textLength: 8000 }),
createMessage({ id: "3", role: "user", textLength: 4000 }),
createMessage({ id: "4", role: "assistant", textLength: 4000 }),
]

const result = SessionCompaction.truncateForCompaction(messages, model)
const hasMessage4 = result.some((m) => m.info.id === "4")
expect(hasMessage4).toBe(true)
},
})
})

test("maintains chronological order after truncation", async () => {
await using tmp = await tmpdir()
await Instance.provide({
directory: tmp.path,
fn: async () => {
const model = createModel({ context: 10_000, output: 2_000 })
const messages = [
createMessage({ id: "1", role: "user", textLength: 4000 }),
createMessage({ id: "2", role: "assistant", textLength: 4000 }),
createMessage({ id: "3", role: "user", textLength: 4000 }),
]

const result = SessionCompaction.truncateForCompaction(messages, model)
for (let i = 1; i < result.length; i++) {
expect(result[i].info.id > result[i - 1].info.id).toBe(true)
}
},
})
})
})

describe("util.token.estimate", () => {
test("estimates tokens from text (4 chars per token)", () => {
const text = "x".repeat(4000)
Expand Down
Loading