anomalyco · Zena-park · Jan 16, 2026 · Jan 19, 2026 · Jan 20, 2026
diff --git a/package.json b/package.json
@@ -4,7 +4,7 @@
   "description": "AI-powered development tool",
   "private": true,
   "type": "module",
-  "packageManager": "[email protected].5",
+  "packageManager": "[email protected].6",
   "scripts": {
     "dev": "bun run --cwd packages/opencode --conditions=browser src/index.ts",
     "typecheck": "bun turbo typecheck",

diff --git a/packages/opencode/src/config/config.ts b/packages/opencode/src/config/config.ts
@@ -1028,6 +1028,22 @@ export namespace Config {
         .object({
           auto: z.boolean().optional().describe("Enable automatic compaction when context is full (default: true)"),
           prune: z.boolean().optional().describe("Enable pruning of old tool outputs (default: true)"),
+          threshold: z
+            .number()
+            .min(0.5)
+            .max(0.99)
+            .optional()
+            .describe(
+              "Percentage of context window to trigger compaction (default: 0.9). Value between 0.5 and 0.99.",
+            ),
+          maxContext: z
+            .number()
+            .int()
+            .positive()
+            .optional()
+            .describe(
+              "Override the model's context limit to a lower value. This sets a user-defined cap on context usage, useful for cost control on large models. Example: If your model supports 2M tokens but you set maxContext to 100k, only 100k will be used. The actual limit will be min(model.limit.context, maxContext).",
+            ),
         })
         .optional(),
       experimental: z

diff --git a/packages/opencode/src/provider/transform.ts b/packages/opencode/src/provider/transform.ts
@@ -632,9 +632,21 @@ export namespace ProviderTransform {
     options: Record<string, any>,
     modelLimit: number,
     globalLimit: number,
+    contextWindow?: number,
+    estimatedInputTokens?: number,
   ): number {
     const modelCap = modelLimit || globalLimit
-    const standardLimit = Math.min(modelCap, globalLimit)
+    let standardLimit = Math.min(modelCap, globalLimit)
+
+    // Dynamic max_tokens calculation based on input size and context window
+    if (contextWindow && estimatedInputTokens) {
+      const SAFETY_BUFFER = 4000 // Buffer to account for estimation errors
+      const availableTokens = contextWindow - estimatedInputTokens - SAFETY_BUFFER
+
+      if (availableTokens > 0) {
+        standardLimit = Math.min(standardLimit, availableTokens)
+      }
+    }
 
     if (npm === "@ai-sdk/anthropic") {
       const thinking = options?.["thinking"]
@@ -649,7 +661,8 @@ export namespace ProviderTransform {
       }
     }
 
-    return standardLimit
+    // Ensure minimum of 1000 tokens
+    return Math.max(1000, standardLimit)
   }
 
   export function schema(model: Provider.Model, schema: JSONSchema.BaseSchema) {

diff --git a/packages/opencode/src/session/compaction.ts b/packages/opencode/src/session/compaction.ts
@@ -14,6 +14,9 @@ import { fn } from "@/util/fn"
 import { Agent } from "@/agent/agent"
 import { Plugin } from "@/plugin"
 import { Config } from "@/config/config"
+import { LLM } from "./llm"
+import { SystemPrompt } from "./system"
+import type { ModelMessage } from "ai"
 
 export namespace SessionCompaction {
   const log = Log.create({ service: "session.compaction" })
@@ -30,12 +33,66 @@ export namespace SessionCompaction {
   export async function isOverflow(input: { tokens: MessageV2.Assistant["tokens"]; model: Provider.Model }) {
     const config = await Config.get()
     if (config.compaction?.auto === false) return false
-    const context = input.model.limit.context
-    if (context === 0) return false
+    const modelContextLimit = input.model.limit.context
+    if (modelContextLimit === 0) return false
+
+    // Use configured maxContext if provided, otherwise use model's context limit
+    const maxContext = config.compaction?.maxContext
+    const context = maxContext ? Math.min(maxContext, modelContextLimit) : modelContextLimit
+
+    // Use configured threshold (default: 0.9 = 90%)
+    const threshold = config.compaction?.threshold ?? 0.9
+
     const count = input.tokens.input + input.tokens.cache.read + input.tokens.output
     const output = Math.min(input.model.limit.output, SessionPrompt.OUTPUT_TOKEN_MAX) || SessionPrompt.OUTPUT_TOKEN_MAX
-    const usable = input.model.limit.input || context - output
-    return count > usable
+
+    // When maxContext is set, use it to calculate usable; otherwise use input limit if available
+    const usable = maxContext
+      ? Math.min(input.model.limit.input || context, context) - output
+      : input.model.limit.input || context - output
+    return count > usable * threshold
+  }
+
+  /**
+   * Check if estimated tokens exceed threshold, used by pre-check and post-check.
+   *
+   * Context limit determination:
+   * 1. Get model's maximum context (from model.limit.input or model.limit.context)
+   * 2. If user set compaction.maxContext, use the smaller of the two
+   *
+   * Example:
+   *   - Model supports: 2M tokens
+   *   - User set maxContext: 100k tokens
+   *   - Actual limit used: 100k tokens (user override)
+   *
+   * @returns needed=true if estimatedTokens > contextLimit * threshold
+   */
+  export async function shouldCompact(input: {
+    model: Provider.Model
+    agent: Agent.Info
+    messages: ModelMessage[]
+  }): Promise<{ needed: boolean; estimatedTokens: number; contextLimit: number; threshold: number }> {
+    const config = await Config.get()
+    const compactionThreshold = config.compaction?.threshold ?? 0.9
+    const maxContext = config.compaction?.maxContext
+    const modelContextLimit = input.model.limit.input || input.model.limit.context
+
+    if (!modelContextLimit) {
+      return { needed: false, estimatedTokens: 0, contextLimit: 0, threshold: compactionThreshold }
+    }
+
+    // Use the smaller value: user's maxContext or model's limit
+    // This allows users to cap context usage on large models for cost control
+    const contextLimit = maxContext ? Math.min(maxContext, modelContextLimit) : modelContextLimit
+    const system = await SystemPrompt.build({ model: input.model, agent: input.agent })
+    const estimatedTokens = LLM.estimateInputTokens(input.messages, system)
+
+    return {
+      needed: estimatedTokens > contextLimit * compactionThreshold,
+      estimatedTokens,
+      contextLimit,
+      threshold: compactionThreshold,
+    }
   }
 
   export const PRUNE_MINIMUM = 20_000

diff --git a/packages/opencode/src/session/llm.ts b/packages/opencode/src/session/llm.ts
@@ -30,6 +30,36 @@ export namespace LLM {
 
   export const OUTPUT_TOKEN_MAX = Flag.OPENCODE_EXPERIMENTAL_OUTPUT_TOKEN_MAX || 32_000
 
+  // Estimate input tokens from messages and system prompt
+  // Uses standard tokenization estimate: ~4 characters per token for English text
+  export function estimateInputTokens(messages: ModelMessage[], systemPrompt: string[]): number {
+    let totalChars = 0
+
+    // Count system prompt
+    for (const sys of systemPrompt) {
+      totalChars += sys.length
+    }
+
+    // Count all messages
+    for (const msg of messages) {
+      if (typeof msg.content === "string") {
+        totalChars += msg.content.length
+      } else if (Array.isArray(msg.content)) {
+        for (const part of msg.content) {
+          if ("text" in part && typeof part.text === "string") {
+            totalChars += part.text.length
+          } else if ("image" in part) {
+            // Approximate image tokens (roughly 2000 tokens per image)
+            totalChars += 2000 * 4 // Convert to chars for consistent calculation
+          }
+        }
+      }
+    }
+
+    // Standard estimate: ~4 chars per token
+    return Math.ceil(totalChars / 4)
+  }
+
   export type StreamInput = {
     user: MessageV2.User
     sessionID: string
@@ -131,13 +161,19 @@ export namespace LLM {
       },
     )
 
+    // Estimate input tokens for dynamic max_tokens calculation
+    const estimatedInput = estimateInputTokens(input.messages, system)
+    const contextWindow = input.model.limit.input || input.model.limit.context
+
     const maxOutputTokens = isCodex
       ? undefined
       : ProviderTransform.maxOutputTokens(
           input.model.api.npm,
           params.options,
           input.model.limit.output,
           OUTPUT_TOKEN_MAX,
+          contextWindow,
+          estimatedInput,
         )
 
     const tools = await resolveTools(input)

diff --git a/packages/opencode/src/session/processor.ts b/packages/opencode/src/session/processor.ts
@@ -20,6 +20,28 @@ export namespace SessionProcessor {
   const DOOM_LOOP_THRESHOLD = 3
   const log = Log.create({ service: "session.processor" })
 
+  // Detect context window overflow errors from various providers
+  function isContextWindowError(error: any): boolean {
+    const message = error?.message?.toLowerCase() || ""
+    const errorCode = error?.code?.toLowerCase() || ""
+
+    // Check common context window error patterns
+    const patterns = [
+      "context_length_exceeded",
+      "context window",
+      "context limit",
+      "maximum context length",
+      "token limit",
+      "too many tokens",
+      "request too large",
+      "prompt is too long",
+      "input is too long",
+      "exceeds the model's maximum",
+    ]
+
+    return patterns.some((pattern) => message.includes(pattern) || errorCode.includes(pattern))
+  }
+
   export type Info = Awaited<ReturnType<typeof create>>
   export type Result = Awaited<ReturnType<Info["process"]>>
 
@@ -189,6 +211,26 @@ export namespace SessionProcessor {
                     })
 
                     delete toolcalls[value.toolCallId]
+
+                    // Check if tool result might cause context overflow
+                    const msgs = await Session.messages({ sessionID: input.sessionID })
+                    const modelMessages = MessageV2.toModelMessage(msgs.map((m) => ({ info: m.info, parts: m.parts })))
+                    const agent = await Agent.get(input.assistantMessage.agent)
+                    const check = await SessionCompaction.shouldCompact({
+                      model: input.model,
+                      agent,
+                      messages: modelMessages,
+                    })
+
+                    if (check.needed) {
+                      log.info("context overflow after tool execution", {
+                        tool: match.tool,
+                        estimatedTokens: check.estimatedTokens,
+                        contextLimit: check.contextLimit,
+                        threshold: check.threshold,
+                      })
+                      needsCompaction = true
+                    }
                   }
                   break
                 }
@@ -341,6 +383,14 @@ export namespace SessionProcessor {
               error: e,
               stack: JSON.stringify(e.stack),
             })
+
+            // Check for context window overflow errors and trigger compaction
+            if (isContextWindowError(e)) {
+              log.info("context window overflow detected, triggering compaction")
+              needsCompaction = true
+              break
+            }
+
             const error = MessageV2.fromError(e, { providerID: input.model.providerID })
             const retry = SessionRetry.retryable(error)
             if (retry !== undefined) {

diff --git a/packages/opencode/src/session/prompt.ts b/packages/opencode/src/session/prompt.ts
@@ -33,6 +33,7 @@ import { spawn } from "child_process"
 import { Command } from "../command"
 import { $, fileURLToPath } from "bun"
 import { ConfigMarkdown } from "../config/markdown"
+import { Config } from "../config/config"
 import { SessionSummary } from "./summary"
 import { NamedError } from "@opencode-ai/util/error"
 import { fn } from "@/util/fn"
@@ -507,8 +508,27 @@ export namespace SessionPrompt {
         continue
       }
 
-      // normal processing
+      // Pre-check: estimate input tokens before API call to prevent overflow errors
       const agent = await Agent.get(lastUser.agent)
+      const modelMessages = MessageV2.toModelMessage(msgs)
+      const check = await SessionCompaction.shouldCompact({ model, agent, messages: modelMessages })
+
+      if (check.needed) {
+        log.info("pre-check overflow", {
+          estimatedTokens: check.estimatedTokens,
+          contextLimit: check.contextLimit,
+          threshold: check.threshold,
+        })
+        await SessionCompaction.create({
+          sessionID,
+          agent: lastUser.agent,
+          model: lastUser.model,
+          auto: true,
+        })
+        continue
+      }
+
+      // normal processing
       const maxSteps = agent.steps ?? Infinity
       const isLastStep = step >= maxSteps
       msgs = await insertReminders({
@@ -688,6 +708,7 @@ export namespace SessionPrompt {
     for (const item of await ToolRegistry.tools(
       { modelID: input.model.api.id, providerID: input.model.providerID },
       input.agent,
+      input.model,
     )) {
       const schema = ProviderTransform.schema(input.model, z.toJSONSchema(item.parameters))
       tools[item.id] = tool({

diff --git a/packages/opencode/src/session/system.ts b/packages/opencode/src/session/system.ts
@@ -17,6 +17,7 @@ import PROMPT_CODEX from "./prompt/codex.txt"
 import PROMPT_CODEX_INSTRUCTIONS from "./prompt/codex_header.txt"
 import type { Provider } from "@/provider/provider"
 import { Flag } from "@/flag/flag"
+import type { Agent } from "@/agent/agent"
 
 export namespace SystemPrompt {
   export function header(providerID: string) {
@@ -37,6 +38,20 @@ export namespace SystemPrompt {
     return [PROMPT_ANTHROPIC_WITHOUT_TODO]
   }
 
+  export async function build(input: { model: Provider.Model; agent: Agent.Info }): Promise<string[]> {
+    const system = header(input.model.providerID)
+    system.push(
+      [
+        ...(input.agent.prompt ? [input.agent.prompt] : provider(input.model)),
+        ...(await environment()),
+        ...(await custom()),
+      ]
+        .filter((x) => x)
+        .join("\n"),
+    )
+    return system
+  }
+
   export async function environment() {
     const project = Instance.project
     return [