diff --git a/packages/opencode/src/session/compaction.ts b/packages/opencode/src/session/compaction.ts index 79884d641ea..02337266a8e 100644 --- a/packages/opencode/src/session/compaction.ts +++ b/packages/opencode/src/session/compaction.ts @@ -27,8 +27,6 @@ export namespace SessionCompaction { ), } - const COMPACTION_BUFFER = 20_000 - export async function isOverflow(input: { tokens: MessageV2.Assistant["tokens"]; model: Provider.Model }) { const config = await Config.get() if (config.compaction?.auto === false) return false @@ -39,11 +37,12 @@ export namespace SessionCompaction { input.tokens.total || input.tokens.input + input.tokens.output + input.tokens.cache.read + input.tokens.cache.write - const reserved = - config.compaction?.reserved ?? Math.min(COMPACTION_BUFFER, ProviderTransform.maxOutputTokens(input.model)) - const usable = input.model.limit.input - ? input.model.limit.input - reserved - : context - ProviderTransform.maxOutputTokens(input.model) + // Reserve headroom so compaction triggers before the next turn overflows. + // maxOutputTokens() is capped at 32K (OUTPUT_TOKEN_MAX) regardless of the + // model's raw output limit, so this is never excessively aggressive. + // Users can override via config.compaction.reserved if needed (#12924). + const reserved = config.compaction?.reserved ?? ProviderTransform.maxOutputTokens(input.model) + const usable = input.model.limit.input ? input.model.limit.input - reserved : context - reserved return count >= usable } diff --git a/packages/opencode/src/session/message-v2.ts b/packages/opencode/src/session/message-v2.ts index 5b4e7bdbc04..f6efee8d42a 100644 --- a/packages/opencode/src/session/message-v2.ts +++ b/packages/opencode/src/session/message-v2.ts @@ -603,7 +603,6 @@ export namespace MessageV2 { } if (msg.info.role === "assistant") { - const differentModel = `${model.providerID}/${model.id}` !== `${msg.info.providerID}/${msg.info.modelID}` const media: Array<{ mime: string; url: string }> = [] if ( @@ -625,7 +624,7 @@ export namespace MessageV2 { assistantMessage.parts.push({ type: "text", text: part.text, - ...(differentModel ? {} : { providerMetadata: part.metadata }), + providerMetadata: part.metadata, }) if (part.type === "step-start") assistantMessage.parts.push({ @@ -660,7 +659,7 @@ export namespace MessageV2 { toolCallId: part.callID, input: part.state.input, output, - ...(differentModel ? {} : { callProviderMetadata: part.metadata }), + callProviderMetadata: part.metadata, }) } if (part.state.status === "error") @@ -670,7 +669,7 @@ export namespace MessageV2 { toolCallId: part.callID, input: part.state.input, errorText: part.state.error, - ...(differentModel ? {} : { callProviderMetadata: part.metadata }), + callProviderMetadata: part.metadata, }) // Handle pending/running tool calls to prevent dangling tool_use blocks // Anthropic/Claude APIs require every tool_use to have a corresponding tool_result @@ -681,14 +680,14 @@ export namespace MessageV2 { toolCallId: part.callID, input: part.state.input, errorText: "[Tool execution was interrupted]", - ...(differentModel ? {} : { callProviderMetadata: part.metadata }), + callProviderMetadata: part.metadata, }) } if (part.type === "reasoning") { assistantMessage.parts.push({ type: "reasoning", text: part.text, - ...(differentModel ? {} : { providerMetadata: part.metadata }), + providerMetadata: part.metadata, }) } } diff --git a/packages/opencode/test/session/compaction.test.ts b/packages/opencode/test/session/compaction.test.ts index 452926d12e1..90fa5c49705 100644 --- a/packages/opencode/test/session/compaction.test.ts +++ b/packages/opencode/test/session/compaction.test.ts @@ -113,19 +113,19 @@ describe("session.compaction.isOverflow", () => { }) }) - // ─── Bug reproduction tests ─────────────────────────────────────────── - // These tests demonstrate that when limit.input is set, isOverflow() - // does not subtract any headroom for the next model response. This means - // compaction only triggers AFTER we've already consumed the full input - // budget, leaving zero room for the next API call's output tokens. + // ─── Headroom reservation tests ────────────────────────────────────── + // These tests verify that when limit.input is set, isOverflow() + // correctly reserves headroom (maxOutputTokens, capped at 32K) so + // compaction triggers before the next API call overflows. // - // Compare: without limit.input, usable = context - output (reserves space). - // With limit.input, usable = limit.input (reserves nothing). + // Previously (bug), the limit.input path only subtracted a 20K buffer + // while the non-input path subtracted the full maxOutputTokens — an + // asymmetry that let sessions grow ~12K tokens too large before compacting. // // Related issues: #10634, #8089, #11086, #12621 // Open PRs: #6875, #12924 - test("BUG: no headroom when limit.input is set — compaction should trigger near boundary but does not", async () => { + test("no headroom when limit.input is set — compaction should trigger near boundary", async () => { await using tmp = await tmpdir() await Instance.provide({ directory: tmp.path, @@ -151,7 +151,7 @@ describe("session.compaction.isOverflow", () => { }) }) - test("BUG: without limit.input, same token count correctly triggers compaction", async () => { + test("without limit.input, same token count correctly triggers compaction", async () => { await using tmp = await tmpdir() await Instance.provide({ directory: tmp.path, @@ -171,7 +171,7 @@ describe("session.compaction.isOverflow", () => { }) }) - test("BUG: asymmetry — limit.input model allows 30K more usage before compaction than equivalent model without it", async () => { + test("asymmetry — limit.input model does not allow more usage than equivalent model without it", async () => { await using tmp = await tmpdir() await Instance.provide({ directory: tmp.path, @@ -180,7 +180,7 @@ describe("session.compaction.isOverflow", () => { const withInputLimit = createModel({ context: 200_000, input: 200_000, output: 32_000 }) const withoutInputLimit = createModel({ context: 200_000, output: 32_000 }) - // 170K total tokens — well above context-output (168K) but below input limit (200K) + // 181K total tokens — above usable (context - maxOutput = 168K) const tokens = { input: 166_000, output: 10_000, reasoning: 0, cache: { read: 5_000, write: 0 } } const withLimit = await SessionCompaction.isOverflow({ tokens, model: withInputLimit }) diff --git a/packages/opencode/test/session/message-v2.test.ts b/packages/opencode/test/session/message-v2.test.ts index 184bcd3efad..b19976ef9fc 100644 --- a/packages/opencode/test/session/message-v2.test.ts +++ b/packages/opencode/test/session/message-v2.test.ts @@ -53,6 +53,17 @@ const model: Provider.Model = { release_date: "2026-01-01", } +const model2: Provider.Model = { + ...model, + id: "other-model", + providerID: "other", + api: { + ...model.api, + id: "other-model", + }, + name: "Other Model", +} + function userInfo(id: string): MessageV2.User { return { id, @@ -355,7 +366,90 @@ describe("session.message-v2.toModelMessage", () => { ]) }) - test("omits provider metadata when assistant model differs", () => { + test("preserves reasoning providerMetadata when model matches", () => { + const assistantID = "m-assistant" + + const input: MessageV2.WithParts[] = [ + { + info: assistantInfo(assistantID, "m-parent"), + parts: [ + { + ...basePart(assistantID, "a1"), + type: "reasoning", + text: "thinking", + metadata: { openai: { signature: "sig-match" } }, + time: { start: 0 }, + }, + ] as MessageV2.Part[], + }, + ] + + expect(MessageV2.toModelMessages(input, model)).toStrictEqual([ + { + role: "assistant", + content: [{ type: "reasoning", text: "thinking", providerOptions: { openai: { signature: "sig-match" } } }], + }, + ]) + }) + + test("preserves reasoning providerMetadata when model differs", () => { + const assistantID = "m-assistant" + + const input: MessageV2.WithParts[] = [ + { + info: assistantInfo(assistantID, "m-parent", undefined, { + providerID: model2.providerID, + modelID: model2.api.id, + }), + parts: [ + { + ...basePart(assistantID, "a1"), + type: "reasoning", + text: "thinking", + metadata: { openai: { signature: "sig-different" } }, + time: { start: 0 }, + }, + ] as MessageV2.Part[], + }, + ] + + expect(MessageV2.toModelMessages(input, model)).toStrictEqual([ + { + role: "assistant", + content: [{ type: "reasoning", text: "thinking", providerOptions: { openai: { signature: "sig-different" } } }], + }, + ]) + }) + + test("preserves text providerMetadata when model differs", () => { + const assistantID = "m-assistant" + + const input: MessageV2.WithParts[] = [ + { + info: assistantInfo(assistantID, "m-parent", undefined, { + providerID: model2.providerID, + modelID: model2.api.id, + }), + parts: [ + { + ...basePart(assistantID, "a1"), + type: "text", + text: "done", + metadata: { openai: { assistant: "meta" } }, + }, + ] as MessageV2.Part[], + }, + ] + + expect(MessageV2.toModelMessages(input, model)).toStrictEqual([ + { + role: "assistant", + content: [{ type: "text", text: "done", providerOptions: { openai: { assistant: "meta" } } }], + }, + ]) + }) + + test("preserves tool callProviderMetadata when model differs", () => { const userID = "m-user" const assistantID = "m-assistant" @@ -371,16 +465,97 @@ describe("session.message-v2.toModelMessage", () => { ] as MessageV2.Part[], }, { - info: assistantInfo(assistantID, userID, undefined, { providerID: "other", modelID: "other" }), + info: assistantInfo(assistantID, userID, undefined, { + providerID: model2.providerID, + modelID: model2.api.id, + }), + parts: [ + { + ...basePart(assistantID, "a1"), + type: "tool", + callID: "call-1", + tool: "bash", + state: { + status: "completed", + input: { cmd: "ls" }, + output: "ok", + title: "Bash", + metadata: {}, + time: { start: 0, end: 1 }, + }, + metadata: { openai: { tool: "meta" } }, + }, + ] as MessageV2.Part[], + }, + ] + + expect(MessageV2.toModelMessages(input, model)).toStrictEqual([ + { + role: "user", + content: [{ type: "text", text: "run tool" }], + }, + { + role: "assistant", + content: [ + { + type: "tool-call", + toolCallId: "call-1", + toolName: "bash", + input: { cmd: "ls" }, + providerExecuted: undefined, + providerOptions: { openai: { tool: "meta" } }, + }, + ], + }, + { + role: "tool", + content: [ + { + type: "tool-result", + toolCallId: "call-1", + toolName: "bash", + output: { type: "text", value: "ok" }, + providerOptions: { openai: { tool: "meta" } }, + }, + ], + }, + ]) + }) + + test("handles undefined metadata gracefully", () => { + const userID = "m-user" + const assistantID = "m-assistant" + + const input: MessageV2.WithParts[] = [ + { + info: userInfo(userID), + parts: [ + { + ...basePart(userID, "u1"), + type: "text", + text: "run tool", + }, + ] as MessageV2.Part[], + }, + { + info: assistantInfo(assistantID, userID, undefined, { + providerID: model2.providerID, + modelID: model2.api.id, + }), parts: [ { ...basePart(assistantID, "a1"), type: "text", text: "done", - metadata: { openai: { assistant: "meta" } }, }, { ...basePart(assistantID, "a2"), + type: "reasoning", + text: "thinking", + time: { start: 0 }, + }, + { + ...basePart(assistantID, "a3"), type: "tool", callID: "call-1", tool: "bash", @@ -392,7 +567,6 @@ describe("session.message-v2.toModelMessage", () => { metadata: {}, time: { start: 0, end: 1 }, }, - metadata: { openai: { tool: "meta" } }, }, ] as MessageV2.Part[], }, @@ -407,6 +581,7 @@ describe("session.message-v2.toModelMessage", () => { role: "assistant", content: [ { type: "text", text: "done" }, + { type: "reasoning", text: "thinking", providerOptions: undefined }, { type: "tool-call", toolCallId: "call-1",