From bb7056b905ae7a14af1e7c22d18d22ba88bbed08 Mon Sep 17 00:00:00 2001 From: Felipe Madero Date: Mon, 26 Jan 2026 23:06:49 -0300 Subject: [PATCH 1/2] feat(provider): auto-detect Ollama context limits Query Ollama API to get model context limits (num_ctx) for proper context percentage display in status bar. Detects Ollama servers by checking if root endpoint returns "Ollama is running", then fetches model info via /api/show. Falls back to 4096 default. --- packages/opencode/src/provider/provider.ts | 47 ++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/packages/opencode/src/provider/provider.ts b/packages/opencode/src/provider/provider.ts index ee7ee75c9f5..28bca25a272 100644 --- a/packages/opencode/src/provider/provider.ts +++ b/packages/opencode/src/provider/provider.ts @@ -41,6 +41,41 @@ import { ProviderTransform } from "./transform" export namespace Provider { const log = Log.create({ service: "provider" }) + const DEFAULT_OLLAMA_CONTEXT = 4096 + + async function isOllamaServer(baseURL: string): Promise { + try { + const ollamaBase = baseURL.replace(/\/v1\/?$/, "") + const response = await fetch(ollamaBase, { + signal: AbortSignal.timeout(2000), + }) + if (!response.ok) return false + const text = await response.text() + return text === "Ollama is running" + } catch { + return false + } + } + + async function fetchOllamaModelContext(baseURL: string, modelName: string): Promise { + try { + const ollamaBase = baseURL.replace(/\/v1\/?$/, "") + const response = await fetch(`${ollamaBase}/api/show`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ name: modelName }), + signal: AbortSignal.timeout(2000), + }) + if (!response.ok) return DEFAULT_OLLAMA_CONTEXT + const data = (await response.json()) as { parameters?: string } + const match = data.parameters?.match(/num_ctx\s+(\d+)/) + if (match) return parseInt(match[1], 10) + } catch { + // Error querying Ollama - use default + } + return DEFAULT_OLLAMA_CONTEXT + } + function isGpt5OrLater(modelID: string): boolean { const match = /^gpt-(\d+)/.exec(modelID) if (!match) { @@ -806,6 +841,18 @@ export namespace Provider { parsed.models[modelID] = parsedModel } database[providerID] = parsed + + // Fetch context limits from Ollama API if this is an Ollama server + // Only query Ollama if no limit was configured (config takes priority) + if (parsed.options.baseURL && (await isOllamaServer(parsed.options.baseURL))) { + const contextFetches = Object.entries(parsed.models).map(async ([modelID, model]) => { + if (model.limit.context === 0) { + const context = await fetchOllamaModelContext(parsed.options.baseURL, modelID) + model.limit.context = context + } + }) + await Promise.all(contextFetches) + } } // load env From 10aa64bf982017a9820f12085a389ade3bd720d7 Mon Sep 17 00:00:00 2001 From: Felipe Madero Date: Mon, 26 Jan 2026 23:51:22 -0300 Subject: [PATCH 2/2] fix(compaction): handle models without output limit When limit.output is 0, fall back to reserving 20% of context (capped at OUTPUT_TOKEN_MAX) instead of hardcoded 32000. This fixes compaction triggering immediately on small context models like 16k Ollama models. --- packages/opencode/src/session/compaction.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/opencode/src/session/compaction.ts b/packages/opencode/src/session/compaction.ts index fb382530291..8f9044f2c06 100644 --- a/packages/opencode/src/session/compaction.ts +++ b/packages/opencode/src/session/compaction.ts @@ -27,13 +27,15 @@ export namespace SessionCompaction { ), } + const OUTPUT_RESERVE_RATIO = 0.10 + export async function isOverflow(input: { tokens: MessageV2.Assistant["tokens"]; model: Provider.Model }) { const config = await Config.get() if (config.compaction?.auto === false) return false const context = input.model.limit.context if (context === 0) return false const count = input.tokens.input + input.tokens.cache.read + input.tokens.output - const output = Math.min(input.model.limit.output, SessionPrompt.OUTPUT_TOKEN_MAX) || SessionPrompt.OUTPUT_TOKEN_MAX + const output = Math.min(input.model.limit.output, SessionPrompt.OUTPUT_TOKEN_MAX) || Math.min(Math.floor(context * OUTPUT_RESERVE_RATIO), SessionPrompt.OUTPUT_TOKEN_MAX) const usable = input.model.limit.input || context - output return count > usable }