Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"description": "AI-powered development tool",
"private": true,
"type": "module",
"packageManager": "[email protected].5",
"packageManager": "[email protected].6",
"scripts": {
"dev": "bun run --cwd packages/opencode --conditions=browser src/index.ts",
"typecheck": "bun turbo typecheck",
Expand Down
16 changes: 16 additions & 0 deletions packages/opencode/src/config/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1028,6 +1028,22 @@ export namespace Config {
.object({
auto: z.boolean().optional().describe("Enable automatic compaction when context is full (default: true)"),
prune: z.boolean().optional().describe("Enable pruning of old tool outputs (default: true)"),
threshold: z
.number()
.min(0.5)
.max(0.99)
.optional()
.describe(
"Percentage of context window to trigger compaction (default: 0.9). Value between 0.5 and 0.99.",
),
maxContext: z
.number()
.int()
.positive()
.optional()
.describe(
"Override the model's context limit to a lower value. This sets a user-defined cap on context usage, useful for cost control on large models. Example: If your model supports 2M tokens but you set maxContext to 100k, only 100k will be used. The actual limit will be min(model.limit.context, maxContext).",
),
})
.optional(),
experimental: z
Expand Down
17 changes: 15 additions & 2 deletions packages/opencode/src/provider/transform.ts
Original file line number Diff line number Diff line change
Expand Up @@ -632,9 +632,21 @@ export namespace ProviderTransform {
options: Record<string, any>,
modelLimit: number,
globalLimit: number,
contextWindow?: number,
estimatedInputTokens?: number,
): number {
const modelCap = modelLimit || globalLimit
const standardLimit = Math.min(modelCap, globalLimit)
let standardLimit = Math.min(modelCap, globalLimit)

// Dynamic max_tokens calculation based on input size and context window
if (contextWindow && estimatedInputTokens) {
const SAFETY_BUFFER = 4000 // Buffer to account for estimation errors
const availableTokens = contextWindow - estimatedInputTokens - SAFETY_BUFFER

if (availableTokens > 0) {
standardLimit = Math.min(standardLimit, availableTokens)
}
}

if (npm === "@ai-sdk/anthropic") {
const thinking = options?.["thinking"]
Expand All @@ -649,7 +661,8 @@ export namespace ProviderTransform {
}
}

return standardLimit
// Ensure minimum of 1000 tokens
return Math.max(1000, standardLimit)
}

export function schema(model: Provider.Model, schema: JSONSchema.BaseSchema) {
Expand Down
65 changes: 61 additions & 4 deletions packages/opencode/src/session/compaction.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ import { fn } from "@/util/fn"
import { Agent } from "@/agent/agent"
import { Plugin } from "@/plugin"
import { Config } from "@/config/config"
import { LLM } from "./llm"
import { SystemPrompt } from "./system"
import type { ModelMessage } from "ai"

export namespace SessionCompaction {
const log = Log.create({ service: "session.compaction" })
Expand All @@ -30,12 +33,66 @@ export namespace SessionCompaction {
export async function isOverflow(input: { tokens: MessageV2.Assistant["tokens"]; model: Provider.Model }) {
const config = await Config.get()
if (config.compaction?.auto === false) return false
const context = input.model.limit.context
if (context === 0) return false
const modelContextLimit = input.model.limit.context
if (modelContextLimit === 0) return false

// Use configured maxContext if provided, otherwise use model's context limit
const maxContext = config.compaction?.maxContext
const context = maxContext ? Math.min(maxContext, modelContextLimit) : modelContextLimit

// Use configured threshold (default: 0.9 = 90%)
const threshold = config.compaction?.threshold ?? 0.9

const count = input.tokens.input + input.tokens.cache.read + input.tokens.output
const output = Math.min(input.model.limit.output, SessionPrompt.OUTPUT_TOKEN_MAX) || SessionPrompt.OUTPUT_TOKEN_MAX
const usable = input.model.limit.input || context - output
return count > usable

// When maxContext is set, use it to calculate usable; otherwise use input limit if available
const usable = maxContext
? Math.min(input.model.limit.input || context, context) - output
: input.model.limit.input || context - output
return count > usable * threshold
}

/**
* Check if estimated tokens exceed threshold, used by pre-check and post-check.
*
* Context limit determination:
* 1. Get model's maximum context (from model.limit.input or model.limit.context)
* 2. If user set compaction.maxContext, use the smaller of the two
*
* Example:
* - Model supports: 2M tokens
* - User set maxContext: 100k tokens
* - Actual limit used: 100k tokens (user override)
*
* @returns needed=true if estimatedTokens > contextLimit * threshold
*/
export async function shouldCompact(input: {
model: Provider.Model
agent: Agent.Info
messages: ModelMessage[]
}): Promise<{ needed: boolean; estimatedTokens: number; contextLimit: number; threshold: number }> {
const config = await Config.get()
const compactionThreshold = config.compaction?.threshold ?? 0.9
const maxContext = config.compaction?.maxContext
const modelContextLimit = input.model.limit.input || input.model.limit.context

if (!modelContextLimit) {
return { needed: false, estimatedTokens: 0, contextLimit: 0, threshold: compactionThreshold }
}

// Use the smaller value: user's maxContext or model's limit
// This allows users to cap context usage on large models for cost control
const contextLimit = maxContext ? Math.min(maxContext, modelContextLimit) : modelContextLimit
const system = await SystemPrompt.build({ model: input.model, agent: input.agent })
const estimatedTokens = LLM.estimateInputTokens(input.messages, system)

return {
needed: estimatedTokens > contextLimit * compactionThreshold,
estimatedTokens,
contextLimit,
threshold: compactionThreshold,
}
}

export const PRUNE_MINIMUM = 20_000
Expand Down
36 changes: 36 additions & 0 deletions packages/opencode/src/session/llm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,36 @@ export namespace LLM {

export const OUTPUT_TOKEN_MAX = Flag.OPENCODE_EXPERIMENTAL_OUTPUT_TOKEN_MAX || 32_000

// Estimate input tokens from messages and system prompt
// Uses standard tokenization estimate: ~4 characters per token for English text
export function estimateInputTokens(messages: ModelMessage[], systemPrompt: string[]): number {
let totalChars = 0

// Count system prompt
for (const sys of systemPrompt) {
totalChars += sys.length
}

// Count all messages
for (const msg of messages) {
if (typeof msg.content === "string") {
totalChars += msg.content.length
} else if (Array.isArray(msg.content)) {
for (const part of msg.content) {
if ("text" in part && typeof part.text === "string") {
totalChars += part.text.length
} else if ("image" in part) {
// Approximate image tokens (roughly 2000 tokens per image)
totalChars += 2000 * 4 // Convert to chars for consistent calculation
}
}
}
}

// Standard estimate: ~4 chars per token
return Math.ceil(totalChars / 4)
}

export type StreamInput = {
user: MessageV2.User
sessionID: string
Expand Down Expand Up @@ -131,13 +161,19 @@ export namespace LLM {
},
)

// Estimate input tokens for dynamic max_tokens calculation
const estimatedInput = estimateInputTokens(input.messages, system)
const contextWindow = input.model.limit.input || input.model.limit.context

const maxOutputTokens = isCodex
? undefined
: ProviderTransform.maxOutputTokens(
input.model.api.npm,
params.options,
input.model.limit.output,
OUTPUT_TOKEN_MAX,
contextWindow,
estimatedInput,
)

const tools = await resolveTools(input)
Expand Down
50 changes: 50 additions & 0 deletions packages/opencode/src/session/processor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,28 @@ export namespace SessionProcessor {
const DOOM_LOOP_THRESHOLD = 3
const log = Log.create({ service: "session.processor" })

// Detect context window overflow errors from various providers
function isContextWindowError(error: any): boolean {
const message = error?.message?.toLowerCase() || ""
const errorCode = error?.code?.toLowerCase() || ""

// Check common context window error patterns
const patterns = [
"context_length_exceeded",
"context window",
"context limit",
"maximum context length",
"token limit",
"too many tokens",
"request too large",
"prompt is too long",
"input is too long",
"exceeds the model's maximum",
]

return patterns.some((pattern) => message.includes(pattern) || errorCode.includes(pattern))
}

export type Info = Awaited<ReturnType<typeof create>>
export type Result = Awaited<ReturnType<Info["process"]>>

Expand Down Expand Up @@ -189,6 +211,26 @@ export namespace SessionProcessor {
})

delete toolcalls[value.toolCallId]

// Check if tool result might cause context overflow
const msgs = await Session.messages({ sessionID: input.sessionID })
const modelMessages = MessageV2.toModelMessage(msgs.map((m) => ({ info: m.info, parts: m.parts })))
const agent = await Agent.get(input.assistantMessage.agent)
const check = await SessionCompaction.shouldCompact({
model: input.model,
agent,
messages: modelMessages,
})

if (check.needed) {
log.info("context overflow after tool execution", {
tool: match.tool,
estimatedTokens: check.estimatedTokens,
contextLimit: check.contextLimit,
threshold: check.threshold,
})
needsCompaction = true
}
}
break
}
Expand Down Expand Up @@ -341,6 +383,14 @@ export namespace SessionProcessor {
error: e,
stack: JSON.stringify(e.stack),
})

// Check for context window overflow errors and trigger compaction
if (isContextWindowError(e)) {
log.info("context window overflow detected, triggering compaction")
needsCompaction = true
break
}

const error = MessageV2.fromError(e, { providerID: input.model.providerID })
const retry = SessionRetry.retryable(error)
if (retry !== undefined) {
Expand Down
23 changes: 22 additions & 1 deletion packages/opencode/src/session/prompt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import { spawn } from "child_process"
import { Command } from "../command"
import { $, fileURLToPath } from "bun"
import { ConfigMarkdown } from "../config/markdown"
import { Config } from "../config/config"
import { SessionSummary } from "./summary"
import { NamedError } from "@opencode-ai/util/error"
import { fn } from "@/util/fn"
Expand Down Expand Up @@ -507,8 +508,27 @@ export namespace SessionPrompt {
continue
}

// normal processing
// Pre-check: estimate input tokens before API call to prevent overflow errors
const agent = await Agent.get(lastUser.agent)
const modelMessages = MessageV2.toModelMessage(msgs)
const check = await SessionCompaction.shouldCompact({ model, agent, messages: modelMessages })

if (check.needed) {
log.info("pre-check overflow", {
estimatedTokens: check.estimatedTokens,
contextLimit: check.contextLimit,
threshold: check.threshold,
})
await SessionCompaction.create({
sessionID,
agent: lastUser.agent,
model: lastUser.model,
auto: true,
})
continue
}

// normal processing
const maxSteps = agent.steps ?? Infinity
const isLastStep = step >= maxSteps
msgs = await insertReminders({
Expand Down Expand Up @@ -688,6 +708,7 @@ export namespace SessionPrompt {
for (const item of await ToolRegistry.tools(
{ modelID: input.model.api.id, providerID: input.model.providerID },
input.agent,
input.model,
)) {
const schema = ProviderTransform.schema(input.model, z.toJSONSchema(item.parameters))
tools[item.id] = tool({
Expand Down
15 changes: 15 additions & 0 deletions packages/opencode/src/session/system.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import PROMPT_CODEX from "./prompt/codex.txt"
import PROMPT_CODEX_INSTRUCTIONS from "./prompt/codex_header.txt"
import type { Provider } from "@/provider/provider"
import { Flag } from "@/flag/flag"
import type { Agent } from "@/agent/agent"

export namespace SystemPrompt {
export function header(providerID: string) {
Expand All @@ -37,6 +38,20 @@ export namespace SystemPrompt {
return [PROMPT_ANTHROPIC_WITHOUT_TODO]
}

export async function build(input: { model: Provider.Model; agent: Agent.Info }): Promise<string[]> {
const system = header(input.model.providerID)
system.push(
[
...(input.agent.prompt ? [input.agent.prompt] : provider(input.model)),
...(await environment()),
...(await custom()),
]
.filter((x) => x)
.join("\n"),
)
return system
}

export async function environment() {
const project = Instance.project
return [
Expand Down
Loading