diff --git a/packages/types/src/provider-settings.ts b/packages/types/src/provider-settings.ts index 024293ddafc..443cbafcf16 100644 --- a/packages/types/src/provider-settings.ts +++ b/packages/types/src/provider-settings.ts @@ -237,6 +237,7 @@ const vertexSchema = apiModelIdProviderModelSchema.extend({ vertexRegion: z.string().optional(), enableUrlContext: z.boolean().optional(), enableGrounding: z.boolean().optional(), + vertex1MContext: z.boolean().optional(), // Enable 'context-1m-2025-08-07' beta for 1M context window. }) const openAiSchema = baseProviderSettingsSchema.extend({ diff --git a/packages/types/src/providers/vertex.ts b/packages/types/src/providers/vertex.ts index 916d72afe06..cd247efb41a 100644 --- a/packages/types/src/providers/vertex.ts +++ b/packages/types/src/providers/vertex.ts @@ -275,29 +275,49 @@ export const vertexModels = { }, "claude-sonnet-4@20250514": { maxTokens: 8192, - contextWindow: 200_000, + contextWindow: 200_000, // Default 200K, extendable to 1M with beta flag 'context-1m-2025-08-07' supportsImages: true, supportsPromptCache: true, supportsNativeTools: true, defaultToolProtocol: "native", - inputPrice: 3.0, - outputPrice: 15.0, - cacheWritesPrice: 3.75, - cacheReadsPrice: 0.3, + inputPrice: 3.0, // $3 per million input tokens (≤200K context) + outputPrice: 15.0, // $15 per million output tokens (≤200K context) + cacheWritesPrice: 3.75, // $3.75 per million tokens + cacheReadsPrice: 0.3, // $0.30 per million tokens supportsReasoningBudget: true, + // Tiered pricing for extended context (requires beta flag 'context-1m-2025-08-07') + tiers: [ + { + contextWindow: 1_000_000, // 1M tokens with beta flag + inputPrice: 6.0, // $6 per million input tokens (>200K context) + outputPrice: 22.5, // $22.50 per million output tokens (>200K context) + cacheWritesPrice: 7.5, // $7.50 per million tokens (>200K context) + cacheReadsPrice: 0.6, // $0.60 per million tokens (>200K context) + }, + ], }, "claude-sonnet-4-5@20250929": { maxTokens: 8192, - contextWindow: 200_000, + contextWindow: 200_000, // Default 200K, extendable to 1M with beta flag 'context-1m-2025-08-07' supportsImages: true, supportsPromptCache: true, supportsNativeTools: true, defaultToolProtocol: "native", - inputPrice: 3.0, - outputPrice: 15.0, - cacheWritesPrice: 3.75, - cacheReadsPrice: 0.3, + inputPrice: 3.0, // $3 per million input tokens (≤200K context) + outputPrice: 15.0, // $15 per million output tokens (≤200K context) + cacheWritesPrice: 3.75, // $3.75 per million tokens + cacheReadsPrice: 0.3, // $0.30 per million tokens supportsReasoningBudget: true, + // Tiered pricing for extended context (requires beta flag 'context-1m-2025-08-07') + tiers: [ + { + contextWindow: 1_000_000, // 1M tokens with beta flag + inputPrice: 6.0, // $6 per million input tokens (>200K context) + outputPrice: 22.5, // $22.50 per million output tokens (>200K context) + cacheWritesPrice: 7.5, // $7.50 per million tokens (>200K context) + cacheReadsPrice: 0.6, // $0.60 per million tokens (>200K context) + }, + ], }, "claude-haiku-4-5@20251001": { maxTokens: 8192, @@ -517,6 +537,10 @@ export const vertexModels = { }, } as const satisfies Record +// Vertex AI models that support 1M context window beta +// Uses the same beta header 'context-1m-2025-08-07' as Anthropic and Bedrock +export const VERTEX_1M_CONTEXT_MODEL_IDS = ["claude-sonnet-4@20250514", "claude-sonnet-4-5@20250929"] as const + export const VERTEX_REGIONS = [ { value: "global", label: "global" }, { value: "us-central1", label: "us-central1" }, diff --git a/src/api/providers/__tests__/anthropic-vertex.spec.ts b/src/api/providers/__tests__/anthropic-vertex.spec.ts index a5e97ed6f3e..0746602d7ef 100644 --- a/src/api/providers/__tests__/anthropic-vertex.spec.ts +++ b/src/api/providers/__tests__/anthropic-vertex.spec.ts @@ -3,6 +3,8 @@ import { Anthropic } from "@anthropic-ai/sdk" import { AnthropicVertex } from "@anthropic-ai/vertex-sdk" +import { VERTEX_1M_CONTEXT_MODEL_IDS } from "@roo-code/types" + import { ApiStreamChunk } from "../../transform/stream" import { AnthropicVertexHandler } from "../anthropic-vertex" @@ -159,35 +161,39 @@ describe("VertexHandler", () => { outputTokens: 5, }) - expect(mockCreate).toHaveBeenCalledWith({ - model: "claude-3-5-sonnet-v2@20241022", - max_tokens: 8192, - temperature: 0, - system: [ - { - type: "text", - text: "You are a helpful assistant", - cache_control: { type: "ephemeral" }, - }, - ], - messages: [ - { - role: "user", - content: [ - { - type: "text", - text: "Hello", - cache_control: { type: "ephemeral" }, - }, - ], - }, - { - role: "assistant", - content: "Hi there!", - }, - ], - stream: true, - }) + expect(mockCreate).toHaveBeenCalledWith( + { + model: "claude-3-5-sonnet-v2@20241022", + max_tokens: 8192, + temperature: 0, + thinking: undefined, + system: [ + { + type: "text", + text: "You are a helpful assistant", + cache_control: { type: "ephemeral" }, + }, + ], + messages: [ + { + role: "user", + content: [ + { + type: "text", + text: "Hello", + cache_control: { type: "ephemeral" }, + }, + ], + }, + { + role: "assistant", + content: "Hi there!", + }, + ], + stream: true, + }, + undefined, + ) }) it("should handle multiple content blocks with line breaks for Claude", async () => { @@ -401,6 +407,7 @@ describe("VertexHandler", () => { }), ], }), + undefined, ) }) @@ -858,6 +865,162 @@ describe("VertexHandler", () => { expect(result.reasoningBudget).toBeUndefined() expect(result.temperature).toBe(0) }) + + it("should enable 1M context for Claude Sonnet 4 when beta flag is set", () => { + const handler = new AnthropicVertexHandler({ + apiModelId: VERTEX_1M_CONTEXT_MODEL_IDS[0], + vertexProjectId: "test-project", + vertexRegion: "us-central1", + vertex1MContext: true, + }) + + const model = handler.getModel() + expect(model.info.contextWindow).toBe(1_000_000) + expect(model.info.inputPrice).toBe(6.0) + expect(model.info.outputPrice).toBe(22.5) + expect(model.betas).toContain("context-1m-2025-08-07") + }) + + it("should enable 1M context for Claude Sonnet 4.5 when beta flag is set", () => { + const handler = new AnthropicVertexHandler({ + apiModelId: VERTEX_1M_CONTEXT_MODEL_IDS[1], + vertexProjectId: "test-project", + vertexRegion: "us-central1", + vertex1MContext: true, + }) + + const model = handler.getModel() + expect(model.info.contextWindow).toBe(1_000_000) + expect(model.info.inputPrice).toBe(6.0) + expect(model.info.outputPrice).toBe(22.5) + expect(model.betas).toContain("context-1m-2025-08-07") + }) + + it("should not enable 1M context when flag is disabled", () => { + const handler = new AnthropicVertexHandler({ + apiModelId: VERTEX_1M_CONTEXT_MODEL_IDS[0], + vertexProjectId: "test-project", + vertexRegion: "us-central1", + vertex1MContext: false, + }) + + const model = handler.getModel() + expect(model.info.contextWindow).toBe(200_000) + expect(model.info.inputPrice).toBe(3.0) + expect(model.info.outputPrice).toBe(15.0) + expect(model.betas).toBeUndefined() + }) + + it("should not enable 1M context for non-supported models even with flag", () => { + const handler = new AnthropicVertexHandler({ + apiModelId: "claude-3-5-sonnet-v2@20241022", + vertexProjectId: "test-project", + vertexRegion: "us-central1", + vertex1MContext: true, + }) + + const model = handler.getModel() + expect(model.info.contextWindow).toBe(200_000) + expect(model.betas).toBeUndefined() + }) + }) + + describe("1M context beta header", () => { + const mockMessages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: "Hello", + }, + ] + + const systemPrompt = "You are a helpful assistant" + + it("should include anthropic-beta header when 1M context is enabled", async () => { + const handler = new AnthropicVertexHandler({ + apiModelId: VERTEX_1M_CONTEXT_MODEL_IDS[0], + vertexProjectId: "test-project", + vertexRegion: "us-central1", + vertex1MContext: true, + }) + + const mockStream = [ + { + type: "message_start", + message: { + usage: { + input_tokens: 10, + output_tokens: 0, + }, + }, + }, + ] + + const asyncIterator = { + async *[Symbol.asyncIterator]() { + for (const chunk of mockStream) { + yield chunk + } + }, + } + + const mockCreate = vitest.fn().mockResolvedValue(asyncIterator) + ;(handler["client"].messages as any).create = mockCreate + + const stream = handler.createMessage(systemPrompt, mockMessages) + + for await (const _chunk of stream) { + // Just consume + } + + // Verify the API was called with the beta header + expect(mockCreate).toHaveBeenCalledWith( + expect.anything(), + expect.objectContaining({ + headers: { "anthropic-beta": "context-1m-2025-08-07" }, + }), + ) + }) + + it("should not include anthropic-beta header when 1M context is disabled", async () => { + const handler = new AnthropicVertexHandler({ + apiModelId: VERTEX_1M_CONTEXT_MODEL_IDS[0], + vertexProjectId: "test-project", + vertexRegion: "us-central1", + vertex1MContext: false, + }) + + const mockStream = [ + { + type: "message_start", + message: { + usage: { + input_tokens: 10, + output_tokens: 0, + }, + }, + }, + ] + + const asyncIterator = { + async *[Symbol.asyncIterator]() { + for (const chunk of mockStream) { + yield chunk + } + }, + } + + const mockCreate = vitest.fn().mockResolvedValue(asyncIterator) + ;(handler["client"].messages as any).create = mockCreate + + const stream = handler.createMessage(systemPrompt, mockMessages) + + for await (const _chunk of stream) { + // Just consume + } + + // Verify the API was called without the beta header + expect(mockCreate).toHaveBeenCalledWith(expect.anything(), undefined) + }) }) describe("thinking model configuration", () => { @@ -946,6 +1109,7 @@ describe("VertexHandler", () => { thinking: { type: "enabled", budget_tokens: 4096 }, temperature: 1.0, // Thinking requires temperature 1.0 }), + undefined, ) }) }) @@ -1032,6 +1196,7 @@ describe("VertexHandler", () => { ]), tool_choice: { type: "auto", disable_parallel_tool_use: true }, }), + undefined, ) }) @@ -1080,6 +1245,7 @@ describe("VertexHandler", () => { expect.not.objectContaining({ tools: expect.anything(), }), + undefined, ) }) diff --git a/src/api/providers/anthropic-vertex.ts b/src/api/providers/anthropic-vertex.ts index 6d2d93f7f55..cbfae08f41e 100644 --- a/src/api/providers/anthropic-vertex.ts +++ b/src/api/providers/anthropic-vertex.ts @@ -9,6 +9,7 @@ import { vertexModels, ANTHROPIC_DEFAULT_MAX_TOKENS, TOOL_PROTOCOL, + VERTEX_1M_CONTEXT_MODEL_IDS, } from "@roo-code/types" import { ApiHandlerOptions } from "../../shared/api" @@ -69,7 +70,7 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple messages: Anthropic.Messages.MessageParam[], metadata?: ApiHandlerCreateMessageMetadata, ): ApiStream { - let { id, info, temperature, maxTokens, reasoning: thinking } = this.getModel() + let { id, info, temperature, maxTokens, reasoning: thinking, betas } = this.getModel() const { supportsPromptCache } = info @@ -120,7 +121,10 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple ...nativeToolParams, } - const stream = await this.client.messages.create(params) + // and prompt caching + const requestOptions = betas?.length ? { headers: { "anthropic-beta": betas.join(",") } } : undefined + + const stream = await this.client.messages.create(params, requestOptions) for await (const chunk of stream) { switch (chunk.type) { @@ -218,14 +222,49 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple getModel() { const modelId = this.options.apiModelId let id = modelId && modelId in vertexModels ? (modelId as VertexModelId) : vertexDefaultModelId - const info: ModelInfo = vertexModels[id] + let info: ModelInfo = vertexModels[id] + + // Check if 1M context beta should be enabled for supported models + const supports1MContext = VERTEX_1M_CONTEXT_MODEL_IDS.includes( + id as (typeof VERTEX_1M_CONTEXT_MODEL_IDS)[number], + ) + const enable1MContext = supports1MContext && this.options.vertex1MContext + + // If 1M context beta is enabled, update the model info with tier pricing + if (enable1MContext) { + const tier = info.tiers?.[0] + if (tier) { + info = { + ...info, + contextWindow: tier.contextWindow, + inputPrice: tier.inputPrice, + outputPrice: tier.outputPrice, + cacheWritesPrice: tier.cacheWritesPrice, + cacheReadsPrice: tier.cacheReadsPrice, + } + } + } + const params = getModelParams({ format: "anthropic", modelId: id, model: info, settings: this.options }) + // Build betas array for request headers + const betas: string[] = [] + + // Add 1M context beta flag if enabled for supported models + if (enable1MContext) { + betas.push("context-1m-2025-08-07") + } + // The `:thinking` suffix indicates that the model is a "Hybrid" // reasoning model and that reasoning is required to be enabled. // The actual model ID honored by Anthropic's API does not have this // suffix. - return { id: id.endsWith(":thinking") ? id.replace(":thinking", "") : id, info, ...params } + return { + id: id.endsWith(":thinking") ? id.replace(":thinking", "") : id, + info, + betas: betas.length > 0 ? betas : undefined, + ...params, + } } async completePrompt(prompt: string) { diff --git a/webview-ui/src/components/settings/providers/Vertex.tsx b/webview-ui/src/components/settings/providers/Vertex.tsx index 57e82bad42b..db1cb23dae1 100644 --- a/webview-ui/src/components/settings/providers/Vertex.tsx +++ b/webview-ui/src/components/settings/providers/Vertex.tsx @@ -2,7 +2,7 @@ import { useCallback } from "react" import { Checkbox } from "vscrui" import { VSCodeLink, VSCodeTextField } from "@vscode/webview-ui-toolkit/react" -import { type ProviderSettings, VERTEX_REGIONS } from "@roo-code/types" +import { type ProviderSettings, VERTEX_REGIONS, VERTEX_1M_CONTEXT_MODEL_IDS } from "@roo-code/types" import { useAppTranslation } from "@src/i18n/TranslationContext" import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@src/components/ui" @@ -18,6 +18,13 @@ type VertexProps = { export const Vertex = ({ apiConfiguration, setApiConfigurationField, simplifySettings }: VertexProps) => { const { t } = useAppTranslation() + // Check if the selected model supports 1M context (Claude Sonnet 4 / 4.5) + const supports1MContextBeta = + !!apiConfiguration?.apiModelId && + VERTEX_1M_CONTEXT_MODEL_IDS.includes( + apiConfiguration.apiModelId as (typeof VERTEX_1M_CONTEXT_MODEL_IDS)[number], + ) + const handleInputChange = useCallback( ( field: K, @@ -94,6 +101,22 @@ export const Vertex = ({ apiConfiguration, setApiConfigurationField, simplifySet + {supports1MContextBeta && ( +
+ { + setApiConfigurationField("vertex1MContext", checked) + }}> + {t("settings:providers.vertex1MContextBetaLabel")} + +
+ {t("settings:providers.vertex1MContextBetaDescription")} +
+
+ )} + {!simplifySettings && apiConfiguration.apiModelId?.startsWith("gemini") && (