diff --git a/docs/plans/2026-03-01-context-1m-auto-escalation-design.md b/docs/plans/2026-03-01-context-1m-auto-escalation-design.md new file mode 100644 index 00000000000..373b09177a8 --- /dev/null +++ b/docs/plans/2026-03-01-context-1m-auto-escalation-design.md @@ -0,0 +1,98 @@ +# Design: Session-Aware Auto-Escalation for 1M Context + +## Problem + +The branch adds `context-1m-2025-08-07` to the Anthropic `anthropic-beta` header unconditionally (`provider.ts` line 126). This causes HTTP 400 errors for accounts below Tier 4: `"The long context beta is not yet available for this subscription."` It also enables 2× input / 1.5× output pricing for requests exceeding 200K tokens, even when conversations are small. + +## Solution + +Session-aware auto-escalation: only send the `context-1m-2025-08-07` beta header when the model supports 1M context AND the session actually needs it. + +## Config + +Add `context1m` to provider options: + +```typescript +context1m: z.union([z.literal("auto"), z.boolean()]).optional() +``` + +```jsonc +// opencode.json +{ "provider": { "anthropic": { "options": { "context1m": "auto" } } } } +``` + +- `"auto"` (default): enable header only when model supports 1M AND session input tokens exceed 150K +- `true`: always send header for models that support 1M context +- `false`: never send header + +## Decision Logic + +Three conditions determine whether the header is sent (in `"auto"` mode, all must be true): + +1. **Model supports it**: `model.limit.context > 200_000` +2. **Session needs it**: accumulated input tokens > 150K (75% of 200K threshold) +3. **Config allows it**: `context1m !== false` + +For `true` mode: only condition 1 is checked. +For `false` mode: never send. + +The model's declared `limit.context` is the capability signal. Users who set `limit.context: 1000000` on a model in their config (e.g., `claude-opus-4-6`) are opting in to 1M support for that model. Models with 200K limits (Haiku, older models) never get the header. + +## Implementation + +### Touch Points + +1. **`provider.ts` — Anthropic loader** (CUSTOM_LOADERS, line 126): Remove `context-1m-2025-08-07` from the static beta header string. Keep `claude-code-20250219`, `interleaved-thinking-2025-05-14`, `fine-grained-tool-streaming-2025-05-14`, and `adaptive-thinking-2026-01-28`. + +2. **`provider.ts` — Module-level state**: Add a boolean flag and setter for the session layer to communicate with the fetch wrapper. + + ```typescript + let _context1m = false + export function setContext1m(enabled: boolean) { + _context1m = enabled + } + ``` + +3. **`provider.ts` — Fetch wrapper** (in `getSDK()`, ~line 1073): For Anthropic requests (check `model.providerID === "anthropic"` or `model.api.npm === "@ai-sdk/anthropic"`), if `_context1m` is true, append `,context-1m-2025-08-07` to the `anthropic-beta` request header. + +4. **`session/llm.ts`** — Before each LLM call: Read the provider config, check the model's context limit, check accumulated session tokens, and call `Provider.setContext1m()`. + + ```typescript + const config = provider.options?.context1m ?? "auto" + const supports1m = model.limit.context > 200_000 + const needs1m = lastUsage.tokens.input > 150_000 + Provider.setContext1m(config === true ? supports1m : config === false ? false : supports1m && needs1m) + ``` + +5. **`config.ts`** — Provider options schema: Add `context1m` to the options object with the union type. + +### Console (`packages/console`) + +The console's `anthropic.ts` already conditionally applies the header based on model name (`supports1m = reqModel.includes("sonnet") || reqModel.includes("opus-4-6")`). This is a separate package and can be updated independently to also respect a config option if desired. + +## Edge Cases + +| Scenario | Behavior | +| ---------------------------------- | ------------------------------------------------------------------------------------------------------- | +| New session, any model | No header — safe for all tiers | +| Opus 4.6 at 180K tokens, auto mode | Header enabled — can grow to 1M | +| Haiku at any token count | Never gets header (200K context limit) | +| Sub-Tier-4, small conversation | No header — works fine | +| Sub-Tier-4, Opus 4.6 at 180K | Header enabled, API returns Tier error. Separate fallback work (see below) handles graceful degradation | +| `context1m: false`, any model | Never sends header, hard 200K limit | +| `context1m: true`, Opus 4.6 at 10K | Header sent. No cost impact — premium pricing only triggers when total input >200K | +| `context1m: true`, Haiku | No header — model doesn't support 1M (context limit ≤200K) | + +## Related Work + +A separate agent is working on runtime fallback for auth/billing errors (`~/.agent-mail/long-context`). That work makes the error recoverable (fall back to another model). Our work prevents the error from occurring in the first place. Both are complementary. + +## Pricing Reference + +The `context-1m` header alone doesn't change pricing. Premium rates only apply when total input tokens (including cache) exceed 200K: + +- Input: 2× standard rate +- Output: 1.5× standard rate +- Cache read/write: proportional increase + +This is why auto-escalation saves money — the header is only present when you'd hit the premium tier anyway. diff --git a/docs/plans/2026-03-01-context-1m-auto-escalation.md b/docs/plans/2026-03-01-context-1m-auto-escalation.md new file mode 100644 index 00000000000..a203d80a6dc --- /dev/null +++ b/docs/plans/2026-03-01-context-1m-auto-escalation.md @@ -0,0 +1,225 @@ +# 1M Context Error-Retry Implementation Plan + +> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task. + +**Goal:** Gracefully handle Anthropic's "long context beta not available" error by retrying without the `context-1m` header, then remembering to skip it for the process lifetime. Zero config needed. + +**Architecture:** Keep the `context-1m-2025-08-07` beta header in the static Anthropic loader (already present on this branch). In the fetch wrapper inside `getSDK()`, detect the specific Tier error from the response, retry the request without the header, and set a process-level flag to skip it on future requests. + +**Tech Stack:** TypeScript, Vercel AI SDK + +**Design doc:** `docs/plans/2026-03-01-context-1m-auto-escalation-design.md` + +--- + +### Task 1: Add Error-Retry Logic to the Fetch Wrapper + +**Files:** + +- Modify: `packages/opencode/src/provider/provider.ts` + +**Context:** The fetch wrapper is at line 1073 inside `getSDK()`. It's a closure that captures `model` from the outer scope. The `anthropic-beta` header including `context-1m-2025-08-07` is set statically in `CUSTOM_LOADERS["anthropic"]` at line 126. + +**Step 1: Add process-level disabled flag** + +At the top of the `Provider` namespace (after the `log` declaration, around line 49), add: + +```typescript +let _context1mDisabled = false +``` + +**Step 2: Add retry logic in the fetch wrapper** + +In the fetch wrapper (`options["fetch"] = async (input, init) => {`, line 1073), replace the final return statement. Currently (line 1106-1110): + +```typescript +return fetchFn(input, { + ...opts, + // @ts-ignore see here: https://github.com/oven-sh/bun/issues/16682 + timeout: false, +}) +``` + +Replace with: + +```typescript +const response = await fetchFn(input, { + ...opts, + // @ts-ignore see here: https://github.com/oven-sh/bun/issues/16682 + timeout: false, +}) + +// Detect Anthropic "long context beta not available" error and retry without the header +if (!_context1mDisabled && model.api.npm === "@ai-sdk/anthropic" && response.status === 400) { + const cloned = response.clone() + const body = await cloned.json().catch(() => null) + if ( + body?.error?.type === "invalid_request_error" && + typeof body?.error?.message === "string" && + body.error.message.toLowerCase().includes("long context") + ) { + log.info("context-1m beta not available, retrying without it") + _context1mDisabled = true + const headers = new Headers(opts.headers as HeadersInit) + const beta = headers.get("anthropic-beta") ?? "" + headers.set( + "anthropic-beta", + beta + .split(",") + .filter((h) => !h.includes("context-1m")) + .join(","), + ) + return fetchFn(input, { + ...opts, + headers, + // @ts-ignore + timeout: false, + }) + } +} + +return response +``` + +**Step 3: Strip `context-1m` from future requests when disabled** + +At the top of the fetch wrapper (after `const opts = init ?? {}`, line 1076), add: + +```typescript +// Skip context-1m header if previously detected as unavailable +if (_context1mDisabled && model.api.npm === "@ai-sdk/anthropic") { + const headers = new Headers(opts.headers as HeadersInit) + const beta = headers.get("anthropic-beta") ?? "" + if (beta.includes("context-1m")) { + headers.set( + "anthropic-beta", + beta + .split(",") + .filter((h) => !h.includes("context-1m")) + .join(","), + ) + opts.headers = headers + } +} +``` + +**Step 4: Verify no type errors** + +Run: `cd packages/opencode && npx tsc --noEmit` +Expected: No new errors + +**Step 5: Describe and advance** + +```bash +jj describe -m "feat(provider): auto-retry without context-1m header when account lacks access" +jj new +``` + +--- + +### Task 2: Tests + +**Files:** + +- Create: `packages/opencode/test/provider/context1m.test.ts` + +**Step 1: Write tests for the retry behavior** + +The retry logic is embedded in the fetch wrapper, which is hard to unit test in isolation. Instead, test the header-stripping logic and the flag behavior: + +```typescript +import { describe, test, expect } from "bun:test" + +describe("context-1m header stripping", () => { + function strip(beta: string) { + return beta + .split(",") + .filter((h) => !h.includes("context-1m")) + .join(",") + } + + test("strips context-1m from beta header", () => { + const header = + "claude-code-20250219,interleaved-thinking-2025-05-14,fine-grained-tool-streaming-2025-05-14,adaptive-thinking-2026-01-28,context-1m-2025-08-07" + expect(strip(header)).toBe( + "claude-code-20250219,interleaved-thinking-2025-05-14,fine-grained-tool-streaming-2025-05-14,adaptive-thinking-2026-01-28", + ) + }) + + test("preserves other headers when context-1m is not present", () => { + const header = "claude-code-20250219,interleaved-thinking-2025-05-14" + expect(strip(header)).toBe("claude-code-20250219,interleaved-thinking-2025-05-14") + }) + + test("handles context-1m as only header", () => { + expect(strip("context-1m-2025-08-07")).toBe("") + }) +}) + +describe("error detection", () => { + test("matches the known Anthropic tier error", () => { + const body = { + error: { + type: "invalid_request_error", + message: "The long context beta is not yet available for this subscription.", + }, + } + const matches = + body.error.type === "invalid_request_error" && + typeof body.error.message === "string" && + body.error.message.toLowerCase().includes("long context") + expect(matches).toBe(true) + }) + + test("does not match unrelated errors", () => { + const body = { + error: { + type: "invalid_request_error", + message: "max_tokens must be less than 8192", + }, + } + const matches = + body.error.type === "invalid_request_error" && + typeof body.error.message === "string" && + body.error.message.toLowerCase().includes("long context") + expect(matches).toBe(false) + }) +}) +``` + +**Step 2: Run the tests** + +Run: `cd packages/opencode && bun test test/provider/context1m.test.ts` +Expected: All tests pass + +**Step 3: Run existing tests for regressions** + +Run: `cd packages/opencode && bun test test/session/compaction.test.ts` +Expected: All tests pass + +**Step 4: Describe and advance** + +```bash +jj describe -m "test(provider): add context-1m retry logic tests" +jj new +``` + +--- + +### Task 3: Verify End-to-End + +**Step 1: Type check the full package** + +Run: `cd packages/opencode && npx tsc --noEmit` +Expected: No errors + +**Step 2: Run the full test suite** + +Run: `cd packages/opencode && bun test` +Expected: All tests pass + +**Step 3: Final describe** + +```bash +jj describe -m "feat(provider): graceful context-1m fallback for sub-Tier-4 accounts" +``` diff --git a/packages/opencode/src/bun/index.ts b/packages/opencode/src/bun/index.ts index e3bddcc2263..3284a555626 100644 --- a/packages/opencode/src/bun/index.ts +++ b/packages/opencode/src/bun/index.ts @@ -54,11 +54,33 @@ export namespace BunProc { }), ) + // For github: dependencies, bun installs under the package's actual name + // (from its package.json "name" field), not under the github: specifier. + // Resolve the real module path by reading the installed package name from + // the cache lockfile. + async function resolveModulePath(pkg: string): Promise { + const nodeModules = path.join(Global.Path.cache, "node_modules") + if (!pkg.startsWith("github:")) return path.join(nodeModules, pkg) + const lockPath = path.join(Global.Path.cache, "bun.lock") + const lock = await Filesystem.readText(lockPath).catch(() => "") + // lockfile maps "actual-name": "github:owner/repo#ref" + for (const line of lock.split("\n")) { + if (line.includes(pkg)) { + const match = line.match(/^\s*"([^"]+)":\s*"/) + if (match && match[1] !== pkg) return path.join(nodeModules, match[1]) + } + } + // Fallback: strip github: prefix and use repo name + const repoName = pkg.replace(/^github:/, "").split("#")[0].split("/").pop() + if (repoName) return path.join(nodeModules, repoName) + return path.join(nodeModules, pkg) + } + export async function install(pkg: string, version = "latest") { // Use lock to ensure only one install at a time using _ = await Lock.write("bun-install") - const mod = path.join(Global.Path.cache, "node_modules", pkg) + const mod = await resolveModulePath(pkg) const pkgjsonPath = path.join(Global.Path.cache, "package.json") const parsed = await Filesystem.readJson<{ dependencies: Record }>(pkgjsonPath).catch(async () => { const result = { dependencies: {} as Record } @@ -89,7 +111,7 @@ export namespace BunProc { ...(proxied() || process.env.CI ? ["--no-cache"] : []), "--cwd", Global.Path.cache, - pkg + "@" + version, + pkg.includes("#") ? pkg : pkg + "@" + version, ] // Let Bun handle registry resolution: @@ -112,11 +134,14 @@ export namespace BunProc { ) }) + // Re-resolve after install in case lockfile changed + const installedMod = await resolveModulePath(pkg) + // Resolve actual version from installed package when using "latest" // This ensures subsequent starts use the cached version until explicitly updated let resolvedVersion = version if (version === "latest") { - const installedPkg = await Filesystem.readJson<{ version?: string }>(path.join(mod, "package.json")).catch( + const installedPkg = await Filesystem.readJson<{ version?: string }>(path.join(installedMod, "package.json")).catch( () => null, ) if (installedPkg?.version) { @@ -126,6 +151,6 @@ export namespace BunProc { parsed.dependencies[pkg] = resolvedVersion await Filesystem.writeJson(pkgjsonPath, parsed) - return mod + return installedMod } } diff --git a/packages/opencode/src/plugin/index.ts b/packages/opencode/src/plugin/index.ts index e65d21bfd60..eef156cd2a1 100644 --- a/packages/opencode/src/plugin/index.ts +++ b/packages/opencode/src/plugin/index.ts @@ -16,7 +16,7 @@ import { gitlabAuthPlugin as GitlabAuthPlugin } from "@gitlab/opencode-gitlab-au export namespace Plugin { const log = Log.create({ service: "plugin" }) - const BUILTIN = ["opencode-anthropic-auth@0.0.13"] + const BUILTIN = ["github:sjawhar/opencode-anthropic-auth#feat/oauth-context-cap"] // Built-in plugins that are directly imported (not installed from npm) const INTERNAL_PLUGINS: PluginInstance[] = [CodexAuthPlugin, CopilotAuthPlugin, GitlabAuthPlugin] diff --git a/packages/opencode/src/provider/provider.ts b/packages/opencode/src/provider/provider.ts index 022ec316795..496253f4943 100644 --- a/packages/opencode/src/provider/provider.ts +++ b/packages/opencode/src/provider/provider.ts @@ -47,6 +47,7 @@ import { Installation } from "../installation" export namespace Provider { const log = Log.create({ service: "provider" }) + let _context1mDisabled = false function isGpt5OrLater(modelID: string): boolean { const match = /^gpt-(\d+)/.exec(modelID) @@ -123,7 +124,7 @@ export namespace Provider { options: { headers: { "anthropic-beta": - "claude-code-20250219,interleaved-thinking-2025-05-14,fine-grained-tool-streaming-2025-05-14", + "claude-code-20250219,interleaved-thinking-2025-05-14,fine-grained-tool-streaming-2025-05-14,adaptive-thinking-2026-01-28,context-1m-2025-08-07", }, }, } @@ -1075,6 +1076,24 @@ export namespace Provider { const fetchFn = customFetch ?? fetch const opts = init ?? {} + // Strip context-1m beta header when account doesn't support it + function stripContext1m(hdrs: HeadersInit) { + const headers = new Headers(hdrs) + const beta = headers.get("anthropic-beta") ?? "" + if (beta.includes("context-1m")) + headers.set( + "anthropic-beta", + beta + .split(",") + .filter((h) => !h.includes("context-1m")) + .join(","), + ) + return headers + } + + if (_context1mDisabled && model.api.npm === "@ai-sdk/anthropic") + opts.headers = stripContext1m(opts.headers as HeadersInit) + if (options["timeout"] !== undefined && options["timeout"] !== null) { const signals: AbortSignal[] = [] if (opts.signal) signals.push(opts.signal) @@ -1103,11 +1122,32 @@ export namespace Provider { } } - return fetchFn(input, { + const response = await fetchFn(input, { ...opts, // @ts-ignore see here: https://github.com/oven-sh/bun/issues/16682 timeout: false, }) + + if (!_context1mDisabled && model.api.npm === "@ai-sdk/anthropic" && response.status === 400) { + const cloned = response.clone() + const body = await cloned.json().catch(() => null) + if ( + body?.error?.type === "invalid_request_error" && + typeof body?.error?.message === "string" && + body.error.message.toLowerCase().includes("long context") + ) { + log.info("context-1m beta not available, retrying without it") + _context1mDisabled = true + const headers = stripContext1m(opts.headers as HeadersInit) + return fetchFn(input, { + ...opts, + headers, + timeout: false, + } as BunFetchRequestInit) + } + } + + return response } const bundledFn = BUNDLED_PROVIDERS[model.api.npm] diff --git a/packages/opencode/src/server/error.ts b/packages/opencode/src/server/error.ts index cc5fa961877..9907bd70adc 100644 --- a/packages/opencode/src/server/error.ts +++ b/packages/opencode/src/server/error.ts @@ -29,6 +29,25 @@ export const ERRORS = { }, }, }, + 409: { + description: "Conflict", + content: { + "application/json": { + schema: resolver( + z + .object({ + name: z.literal("DuplicateIDError"), + data: z.object({ + id: z.string(), + }), + }) + .meta({ + ref: "DuplicateIDError", + }), + ), + }, + }, + }, } as const export function errors(...codes: number[]) { diff --git a/packages/opencode/src/server/routes/session.ts b/packages/opencode/src/server/routes/session.ts index 12938aeaba0..c3998275bdc 100644 --- a/packages/opencode/src/server/routes/session.ts +++ b/packages/opencode/src/server/routes/session.ts @@ -189,7 +189,7 @@ export const SessionRoutes = lazy(() => description: "Create a new OpenCode session for interacting with AI assistants and managing conversations.", operationId: "session.create", responses: { - ...errors(400), + ...errors(400, 409), 200: { description: "Successfully created session", content: { @@ -579,6 +579,36 @@ export const SessionRoutes = lazy(() => return c.json(messages) }, ) + .get( + "/:sessionID/message/count", + describeRoute({ + summary: "Get message count", + description: "Get the total number of messages in a session.", + operationId: "session.messageCount", + responses: { + 200: { + description: "Message count", + content: { + "application/json": { + schema: resolver(z.object({ count: z.number() })), + }, + }, + }, + ...errors(400, 404), + }, + }), + validator( + "param", + z.object({ + sessionID: z.string().meta({ description: "Session ID" }), + }), + ), + async (c) => { + const sessionID = c.req.valid("param").sessionID + const count = await Session.messageCount({ sessionID }) + return c.json({ count }) + }, + ) .get( "/:sessionID/message/:messageID", describeRoute({ diff --git a/packages/opencode/src/server/server.ts b/packages/opencode/src/server/server.ts index 9fba9c1fe1a..2bb1463ea2a 100644 --- a/packages/opencode/src/server/server.ts +++ b/packages/opencode/src/server/server.ts @@ -68,6 +68,7 @@ export namespace Server { if (err instanceof NotFoundError) status = 404 else if (err instanceof Provider.ModelNotFoundError) status = 400 else if (err.name.startsWith("Worktree")) status = 400 + else if (err.name === "DuplicateIDError") status = 409 else status = 500 return c.json(err.toObject(), { status }) } diff --git a/packages/opencode/src/session/index.ts b/packages/opencode/src/session/index.ts index 22de477f8d1..f7f7f5097c2 100644 --- a/packages/opencode/src/session/index.ts +++ b/packages/opencode/src/session/index.ts @@ -9,8 +9,9 @@ import { Config } from "../config/config" import { Flag } from "../flag/flag" import { Identifier } from "../id/id" import { Installation } from "../installation" +import { NamedError } from "@opencode-ai/util/error" -import { Database, NotFoundError, eq, and, or, gte, isNull, desc, like, inArray, lt } from "../storage/db" +import { Database, NotFoundError, eq, and, or, gte, isNull, desc, like, inArray, lt, sql } from "../storage/db" import type { SQL } from "../storage/db" import { SessionTable, MessageTable, PartTable } from "./session.sql" import { ProjectTable } from "../project/project.sql" @@ -26,7 +27,7 @@ import { Snapshot } from "@/snapshot" import type { Provider } from "@/provider/provider" import { PermissionNext } from "@/permission/next" import { Global } from "@/global" -import type { LanguageModelV2Usage } from "@ai-sdk/provider" +import type { LanguageModelUsage } from "ai" import { iife } from "@/util/iife" export namespace Session { @@ -212,13 +213,22 @@ export namespace Session { export const create = fn( z .object({ + id: Identifier.schema("session").optional(), parentID: Identifier.schema("session").optional(), title: z.string().optional(), permission: Info.shape.permission, }) .optional(), async (input) => { + if (input?.id) { + const existing = await get(input.id).catch((e) => { + if (e instanceof NotFoundError) return undefined + throw e + }) + if (existing) throw new DuplicateIDError({ id: input.id }) + } return createNext({ + id: input?.id, parentID: input?.parentID, directory: Instance.directory, title: input?.title, @@ -525,6 +535,22 @@ export namespace Session { }, ) + export const messageCount = fn( + z.object({ + sessionID: Identifier.schema("session"), + }), + async (input) => { + const result = Database.use((db) => + db + .select({ count: sql`COUNT(*)` }) + .from(MessageTable) + .where(eq(MessageTable.session_id, input.sessionID)) + .get(), + ) + return result?.count ?? 0 + }, + ) + export function* list(input?: { directory?: string roots?: boolean @@ -775,7 +801,7 @@ export namespace Session { export const getUsage = fn( z.object({ model: z.custom(), - usage: z.custom(), + usage: z.custom(), metadata: z.custom().optional(), }), (input) => { @@ -787,23 +813,31 @@ export namespace Session { const outputTokens = safe(input.usage.outputTokens ?? 0) const reasoningTokens = safe(input.usage.reasoningTokens ?? 0) - const cacheReadInputTokens = safe(input.usage.cachedInputTokens ?? 0) + // SDK v6: forward-compat for inputTokenDetails (not yet in @ai-sdk/provider types) + const usage = input.usage as LanguageModelUsage & { + inputTokenDetails?: { noCacheTokens?: number; cacheReadTokens?: number; cacheWriteTokens?: number } + } + + const cacheReadInputTokens = safe( + usage.inputTokenDetails?.cacheReadTokens ?? input.usage.cachedInputTokens ?? 0, + ) const cacheWriteInputTokens = safe( - (input.metadata?.["anthropic"]?.["cacheCreationInputTokens"] ?? - // @ts-expect-error - input.metadata?.["bedrock"]?.["usage"]?.["cacheWriteInputTokens"] ?? - // @ts-expect-error - input.metadata?.["venice"]?.["usage"]?.["cacheCreationInputTokens"] ?? - 0) as number, + usage.inputTokenDetails?.cacheWriteTokens ?? + ((input.metadata?.["anthropic"]?.["cacheCreationInputTokens"] ?? + // @ts-expect-error + input.metadata?.["bedrock"]?.["usage"]?.["cacheWriteInputTokens"] ?? + // @ts-expect-error + input.metadata?.["venice"]?.["usage"]?.["cacheCreationInputTokens"] ?? + 0) as number), ) - // OpenRouter provides inputTokens as the total count of input tokens (including cached). - // AFAIK other providers (OpenRouter/OpenAI/Gemini etc.) do it the same way e.g. vercel/ai#8794 (comment) - // Anthropic does it differently though - inputTokens doesn't include cached tokens. - // It looks like OpenCode's cost calculation assumes all providers return inputTokens the same way Anthropic does (I'm guessing getUsage logic was originally implemented with anthropic), so it's causing incorrect cost calculation for OpenRouter and others. - const excludesCachedTokens = !!(input.metadata?.["anthropic"] || input.metadata?.["bedrock"]) + // SDK v6: inputTokens is now the TOTAL (including cache). Use noCacheTokens when available. + // Fallback: OpenRouter/OpenAI/Gemini include cache in inputTokens, Anthropic/Bedrock don't. const adjustedInputTokens = safe( - excludesCachedTokens ? inputTokens : inputTokens - cacheReadInputTokens - cacheWriteInputTokens, + usage.inputTokenDetails?.noCacheTokens ?? + (!!(input.metadata?.["anthropic"] || input.metadata?.["bedrock"]) + ? inputTokens + : inputTokens - cacheReadInputTokens - cacheWriteInputTokens), ) const total = iife(() => { @@ -857,6 +891,13 @@ export namespace Session { } } + export const DuplicateIDError = NamedError.create( + "DuplicateIDError", + z.object({ + id: z.string(), + }), + ) + export const initialize = fn( z.object({ sessionID: Identifier.schema("session"), diff --git a/packages/opencode/test/provider/context1m.test.ts b/packages/opencode/test/provider/context1m.test.ts new file mode 100644 index 00000000000..bd68ef80514 --- /dev/null +++ b/packages/opencode/test/provider/context1m.test.ts @@ -0,0 +1,90 @@ +import { describe, test, expect } from "bun:test" + +// Test the header-stripping logic extracted from provider.ts +function stripContext1m(beta: string) { + return beta + .split(",") + .filter((h) => !h.includes("context-1m")) + .join(",") +} + +describe("context-1m header stripping", () => { + test("strips context-1m from beta header", () => { + const header = + "claude-code-20250219,interleaved-thinking-2025-05-14,fine-grained-tool-streaming-2025-05-14,adaptive-thinking-2026-01-28,context-1m-2025-08-07" + expect(stripContext1m(header)).toBe( + "claude-code-20250219,interleaved-thinking-2025-05-14,fine-grained-tool-streaming-2025-05-14,adaptive-thinking-2026-01-28", + ) + }) + + test("preserves other headers when context-1m is not present", () => { + const header = "claude-code-20250219,interleaved-thinking-2025-05-14" + expect(stripContext1m(header)).toBe("claude-code-20250219,interleaved-thinking-2025-05-14") + }) + + test("handles context-1m as only header", () => { + expect(stripContext1m("context-1m-2025-08-07")).toBe("") + }) +}) + +describe("error detection", () => { + function matches(body: any) { + return ( + body?.error?.type === "invalid_request_error" && + typeof body?.error?.message === "string" && + body.error.message.toLowerCase().includes("long context") + ) + } + + test("matches the known Anthropic tier error", () => { + expect( + matches({ + error: { + type: "invalid_request_error", + message: "The long context beta is not yet available for this subscription.", + }, + }), + ).toBe(true) + }) + + test("matches variant wording", () => { + expect( + matches({ + error: { + type: "invalid_request_error", + message: "Extra usage is required for long context requests", + }, + }), + ).toBe(true) + }) + + test("does not match unrelated invalid_request_error", () => { + expect( + matches({ + error: { + type: "invalid_request_error", + message: "max_tokens must be less than 8192", + }, + }), + ).toBe(false) + }) + + test("does not match different error type", () => { + expect( + matches({ + error: { + type: "authentication_error", + message: "long context issue", + }, + }), + ).toBe(false) + }) + + test("handles null body", () => { + expect(matches(null)).toBe(false) + }) + + test("handles missing error field", () => { + expect(matches({})).toBe(false) + }) +}) diff --git a/packages/opencode/test/session/compaction.test.ts b/packages/opencode/test/session/compaction.test.ts index 452926d12e1..8dd06200277 100644 --- a/packages/opencode/test/session/compaction.test.ts +++ b/packages/opencode/test/session/compaction.test.ts @@ -7,6 +7,7 @@ import { Log } from "../../src/util/log" import { tmpdir } from "../fixture/fixture" import { Session } from "../../src/session" import type { Provider } from "../../src/provider/provider" +import type { LanguageModelUsage } from "ai" Log.init({ print: false }) @@ -297,16 +298,18 @@ describe("session.getUsage", () => { expect(result.tokens.cache.write).toBe(300) }) - test("does not subtract cached tokens for anthropic provider", () => { + test("uses noCacheTokens for anthropic provider in SDK v6", () => { const model = createModel({ context: 100_000, output: 32_000 }) const result = Session.getUsage({ model, usage: { - inputTokens: 1000, + inputTokens: 1200, outputTokens: 500, - totalTokens: 1500, + totalTokens: 1700, cachedInputTokens: 200, - }, + inputTokenDetails: { noCacheTokens: 1000, cacheReadTokens: 200, cacheWriteTokens: undefined }, + outputTokenDetails: { textTokens: undefined, reasoningTokens: undefined }, + } as LanguageModelUsage, metadata: { anthropic: {}, }, @@ -383,6 +386,8 @@ describe("session.getUsage", () => { // excluding cache read/write. totalTokens: 1500, cachedInputTokens: 200, + inputTokenDetails: { noCacheTokens: undefined, cacheReadTokens: 200, cacheWriteTokens: undefined }, + outputTokenDetails: { textTokens: undefined, reasoningTokens: undefined }, } if (npm === "@ai-sdk/amazon-bedrock") { const result = Session.getUsage({ diff --git a/packages/opencode/test/session/session.test.ts b/packages/opencode/test/session/session.test.ts index 219cef12713..fe2feb3e640 100644 --- a/packages/opencode/test/session/session.test.ts +++ b/packages/opencode/test/session/session.test.ts @@ -4,6 +4,7 @@ import { Session } from "../../src/session" import { Bus } from "../../src/bus" import { Log } from "../../src/util/log" import { Instance } from "../../src/project/instance" +import { Identifier } from "../../src/id/id" const projectRoot = path.join(__dirname, "../..") Log.init({ print: false }) @@ -69,3 +70,55 @@ describe("session.started event", () => { }) }) }) + +describe("session.create custom id", () => { + test("custom ID accepted", async () => { + await Instance.provide({ + directory: projectRoot, + fn: async () => { + const customId = Identifier.descending("session") + const session = await Session.create({ id: customId }) + + expect(session.id).toBe(customId) + + await Session.remove(session.id) + }, + }) + }) + + test("default behavior preserved", async () => { + await Instance.provide({ + directory: projectRoot, + fn: async () => { + const session = await Session.create({}) + + expect(session.id).toMatch(/^ses_[0-9a-f]{12}[0-9A-Za-z]{14}$/) + + await Session.remove(session.id) + }, + }) + }) + + test("duplicate ID returns error", async () => { + await Instance.provide({ + directory: projectRoot, + fn: async () => { + const customId = Identifier.descending("session") + const session = await Session.create({ id: customId }) + + await expect(Session.create({ id: customId })).rejects.toThrow("DuplicateIDError") + + await Session.remove(session.id) + }, + }) + }) + + test("invalid prefix rejected", async () => { + await Instance.provide({ + directory: projectRoot, + fn: async () => { + expect(() => Session.create({ id: "bad_a1b2c3d4e5f6AbCdEfGhIjKlMn" })).toThrow() + }, + }) + }) +}) diff --git a/packages/opencode/test/session/structured-output.test.ts b/packages/opencode/test/session/structured-output.test.ts index 2be4257dc78..5eb0b9c2993 100644 --- a/packages/opencode/test/session/structured-output.test.ts +++ b/packages/opencode/test/session/structured-output.test.ts @@ -362,21 +362,26 @@ describe("structured-output.createStructuredOutputTool", () => { expect(inputSchema.jsonSchema?.properties?.tags?.items?.type).toBe("string") }) - test("toModelOutput returns text value", () => { + test("toModelOutput returns text value", async () => { const tool = SessionPrompt.createStructuredOutputTool({ schema: { type: "object" }, onSuccess: () => {}, }) expect(tool.toModelOutput).toBeDefined() - const modelOutput = tool.toModelOutput!({ - output: "Test output", - title: "Test", - metadata: { valid: true }, - }) + const modelOutput = await Promise.resolve( + tool.toModelOutput!({ + toolCallId: "test-call-id", + input: {}, + output: { + output: "Test output", + title: "Test", + metadata: { valid: true }, + }, + }), + ) expect(modelOutput.type).toBe("text") - expect(modelOutput.value).toBe("Test output") }) // Note: Retry behavior is handled by the AI SDK and the prompt loop, not the tool itself diff --git a/packages/sdk/js/src/v2/gen/sdk.gen.ts b/packages/sdk/js/src/v2/gen/sdk.gen.ts index 6165c0f7b09..07c8e3a4263 100644 --- a/packages/sdk/js/src/v2/gen/sdk.gen.ts +++ b/packages/sdk/js/src/v2/gen/sdk.gen.ts @@ -117,6 +117,8 @@ import type { SessionInitErrors, SessionInitResponses, SessionListResponses, + SessionMessageCountErrors, + SessionMessageCountResponses, SessionMessageErrors, SessionMessageResponses, SessionMessagesErrors, @@ -1021,6 +1023,7 @@ export class Session2 extends HeyApiClient { public create( parameters?: { directory?: string + id?: string parentID?: string title?: string permission?: PermissionRuleset @@ -1033,6 +1036,7 @@ export class Session2 extends HeyApiClient { { args: [ { in: "query", key: "directory" }, + { in: "body", key: "id" }, { in: "body", key: "parentID" }, { in: "body", key: "title" }, { in: "body", key: "permission" }, @@ -1563,6 +1567,36 @@ export class Session2 extends HeyApiClient { }) } + /** + * Get message count + * + * Get the total number of messages in a session. + */ + public messageCount( + parameters: { + sessionID: string + directory?: string + }, + options?: Options, + ) { + const params = buildClientParams( + [parameters], + [ + { + args: [ + { in: "path", key: "sessionID" }, + { in: "query", key: "directory" }, + ], + }, + ], + ) + return (options?.client ?? this.client).get({ + url: "/session/{sessionID}/message/count", + ...options, + ...params, + }) + } + /** * Delete message * diff --git a/packages/sdk/js/src/v2/gen/types.gen.ts b/packages/sdk/js/src/v2/gen/types.gen.ts index be6c00cf445..4120568ac34 100644 --- a/packages/sdk/js/src/v2/gen/types.gen.ts +++ b/packages/sdk/js/src/v2/gen/types.gen.ts @@ -1682,6 +1682,13 @@ export type McpResource = { client: string } +export type DuplicateIdError = { + name: "DuplicateIDError" + data: { + id: string + } +} + export type TextPartInput = { id?: string type: "text" @@ -2605,6 +2612,7 @@ export type SessionListResponse = SessionListResponses[keyof SessionListResponse export type SessionCreateData = { body?: { + id?: string parentID?: string title?: string permission?: PermissionRuleset @@ -2621,6 +2629,10 @@ export type SessionCreateErrors = { * Bad request */ 400: BadRequestError + /** + * Conflict + */ + 409: DuplicateIdError } export type SessionCreateError = SessionCreateErrors[keyof SessionCreateErrors] @@ -3155,6 +3167,44 @@ export type SessionPromptResponses = { export type SessionPromptResponse = SessionPromptResponses[keyof SessionPromptResponses] +export type SessionMessageCountData = { + body?: never + path: { + /** + * Session ID + */ + sessionID: string + } + query?: { + directory?: string + } + url: "/session/{sessionID}/message/count" +} + +export type SessionMessageCountErrors = { + /** + * Bad request + */ + 400: BadRequestError + /** + * Not found + */ + 404: NotFoundError +} + +export type SessionMessageCountError = SessionMessageCountErrors[keyof SessionMessageCountErrors] + +export type SessionMessageCountResponses = { + /** + * Message count + */ + 200: { + count: number + } +} + +export type SessionMessageCountResponse = SessionMessageCountResponses[keyof SessionMessageCountResponses] + export type SessionDeleteMessageData = { body?: never path: { diff --git a/packages/sdk/openapi.json b/packages/sdk/openapi.json index 0f9c6f0203c..bf0537edf1c 100644 --- a/packages/sdk/openapi.json +++ b/packages/sdk/openapi.json @@ -1430,6 +1430,16 @@ } } } + }, + "409": { + "description": "Conflict", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DuplicateIDError" + } + } + } } }, "requestBody": { @@ -1438,6 +1448,10 @@ "schema": { "type": "object", "properties": { + "id": { + "type": "string", + "pattern": "^ses.*" + }, "parentID": { "type": "string", "pattern": "^ses.*" @@ -10288,6 +10302,25 @@ }, "required": ["name", "uri", "client"] }, + "DuplicateIDError": { + "type": "object", + "properties": { + "name": { + "type": "string", + "const": "DuplicateIDError" + }, + "data": { + "type": "object", + "properties": { + "id": { + "type": "string" + } + }, + "required": ["id"] + } + }, + "required": ["name", "data"] + }, "TextPartInput": { "type": "object", "properties": {