RooCodeInc · mrubens · Dec 19, 2025 · Dec 19, 2025 · Dec 19, 2025
@@ -237,6 +237,7 @@ const vertexSchema = apiModelIdProviderModelSchema.extend({
 	vertexRegion: z.string().optional(),
 	enableUrlContext: z.boolean().optional(),
 	enableGrounding: z.boolean().optional(),
+	vertex1MContext: z.boolean().optional(), // Enable 'context-1m-2025-08-07' beta for 1M context window.
 })
 
 const openAiSchema = baseProviderSettingsSchema.extend({

@@ -275,29 +275,49 @@ export const vertexModels = {
 	},
 	"claude-sonnet-4@20250514": {
 		maxTokens: 8192,
-		contextWindow: 200_000,
+		contextWindow: 200_000, // Default 200K, extendable to 1M with beta flag 'context-1m-2025-08-07'
 		supportsImages: true,
 		supportsPromptCache: true,
 		supportsNativeTools: true,
 		defaultToolProtocol: "native",
-		inputPrice: 3.0,
-		outputPrice: 15.0,
-		cacheWritesPrice: 3.75,
-		cacheReadsPrice: 0.3,
+		inputPrice: 3.0, // $3 per million input tokens (≤200K context)
+		outputPrice: 15.0, // $15 per million output tokens (≤200K context)
+		cacheWritesPrice: 3.75, // $3.75 per million tokens
+		cacheReadsPrice: 0.3, // $0.30 per million tokens
 		supportsReasoningBudget: true,
+		// Tiered pricing for extended context (requires beta flag 'context-1m-2025-08-07')
+		tiers: [
+			{
+				contextWindow: 1_000_000, // 1M tokens with beta flag
+				inputPrice: 6.0, // $6 per million input tokens (>200K context)
+				outputPrice: 22.5, // $22.50 per million output tokens (>200K context)
+				cacheWritesPrice: 7.5, // $7.50 per million tokens (>200K context)
+				cacheReadsPrice: 0.6, // $0.60 per million tokens (>200K context)
+			},
+		],
 	},
 	"claude-sonnet-4-5@20250929": {
 		maxTokens: 8192,
-		contextWindow: 200_000,
+		contextWindow: 200_000, // Default 200K, extendable to 1M with beta flag 'context-1m-2025-08-07'
 		supportsImages: true,
 		supportsPromptCache: true,
 		supportsNativeTools: true,
 		defaultToolProtocol: "native",
-		inputPrice: 3.0,
-		outputPrice: 15.0,
-		cacheWritesPrice: 3.75,
-		cacheReadsPrice: 0.3,
+		inputPrice: 3.0, // $3 per million input tokens (≤200K context)
+		outputPrice: 15.0, // $15 per million output tokens (≤200K context)
+		cacheWritesPrice: 3.75, // $3.75 per million tokens
+		cacheReadsPrice: 0.3, // $0.30 per million tokens
 		supportsReasoningBudget: true,
+		// Tiered pricing for extended context (requires beta flag 'context-1m-2025-08-07')
+		tiers: [
+			{
+				contextWindow: 1_000_000, // 1M tokens with beta flag
+				inputPrice: 6.0, // $6 per million input tokens (>200K context)
+				outputPrice: 22.5, // $22.50 per million output tokens (>200K context)
+				cacheWritesPrice: 7.5, // $7.50 per million tokens (>200K context)
+				cacheReadsPrice: 0.6, // $0.60 per million tokens (>200K context)
+			},
+		],
 	},
 	"claude-haiku-4-5@20251001": {
 		maxTokens: 8192,
@@ -517,6 +537,10 @@ export const vertexModels = {
 	},
 } as const satisfies Record<string, ModelInfo>
 
+// Vertex AI models that support 1M context window beta
+// Uses the same beta header 'context-1m-2025-08-07' as Anthropic and Bedrock
+export const VERTEX_1M_CONTEXT_MODEL_IDS = ["claude-sonnet-4@20250514", "claude-sonnet-4-5@20250929"] as const
+
 export const VERTEX_REGIONS = [
 	{ value: "global", label: "global" },
 	{ value: "us-central1", label: "us-central1" },

@@ -3,6 +3,8 @@
 import { Anthropic } from "@anthropic-ai/sdk"
 import { AnthropicVertex } from "@anthropic-ai/vertex-sdk"
 
+import { VERTEX_1M_CONTEXT_MODEL_IDS } from "@roo-code/types"
+
 import { ApiStreamChunk } from "../../transform/stream"
 
 import { AnthropicVertexHandler } from "../anthropic-vertex"
@@ -159,35 +161,39 @@ describe("VertexHandler", () => {
 				outputTokens: 5,
 			})
 
-			expect(mockCreate).toHaveBeenCalledWith({
-				model: "claude-3-5-sonnet-v2@20241022",
-				max_tokens: 8192,
-				temperature: 0,
-				system: [
-					{
-						type: "text",
-						text: "You are a helpful assistant",
-						cache_control: { type: "ephemeral" },
-					},
-				],
-				messages: [
-					{
-						role: "user",
-						content: [
-							{
-								type: "text",
-								text: "Hello",
-								cache_control: { type: "ephemeral" },
-							},
-						],
-					},
-					{
-						role: "assistant",
-						content: "Hi there!",
-					},
-				],
-				stream: true,
-			})
+			expect(mockCreate).toHaveBeenCalledWith(
+				{
+					model: "claude-3-5-sonnet-v2@20241022",
+					max_tokens: 8192,
+					temperature: 0,
+					thinking: undefined,
+					system: [
+						{
+							type: "text",
+							text: "You are a helpful assistant",
+							cache_control: { type: "ephemeral" },
+						},
+					],
+					messages: [
+						{
+							role: "user",
+							content: [
+								{
+									type: "text",
+									text: "Hello",
+									cache_control: { type: "ephemeral" },
+								},
+							],
+						},
+						{
+							role: "assistant",
+							content: "Hi there!",
+						},
+					],
+					stream: true,
+				},
+				undefined,
+			)
 		})
 
 		it("should handle multiple content blocks with line breaks for Claude", async () => {
@@ -401,6 +407,7 @@ describe("VertexHandler", () => {
 						}),
 					],
 				}),
+				undefined,
 			)
 		})
 
@@ -858,6 +865,162 @@ describe("VertexHandler", () => {
 			expect(result.reasoningBudget).toBeUndefined()
 			expect(result.temperature).toBe(0)
 		})
+
+		it("should enable 1M context for Claude Sonnet 4 when beta flag is set", () => {
+			const handler = new AnthropicVertexHandler({
+				apiModelId: VERTEX_1M_CONTEXT_MODEL_IDS[0],
+				vertexProjectId: "test-project",
+				vertexRegion: "us-central1",
+				vertex1MContext: true,
+			})
+
+			const model = handler.getModel()
+			expect(model.info.contextWindow).toBe(1_000_000)
+			expect(model.info.inputPrice).toBe(6.0)
+			expect(model.info.outputPrice).toBe(22.5)
+			expect(model.betas).toContain("context-1m-2025-08-07")
+		})
+
+		it("should enable 1M context for Claude Sonnet 4.5 when beta flag is set", () => {
+			const handler = new AnthropicVertexHandler({
+				apiModelId: VERTEX_1M_CONTEXT_MODEL_IDS[1],
+				vertexProjectId: "test-project",
+				vertexRegion: "us-central1",
+				vertex1MContext: true,
+			})
+
+			const model = handler.getModel()
+			expect(model.info.contextWindow).toBe(1_000_000)
+			expect(model.info.inputPrice).toBe(6.0)
+			expect(model.info.outputPrice).toBe(22.5)
+			expect(model.betas).toContain("context-1m-2025-08-07")
+		})
+
+		it("should not enable 1M context when flag is disabled", () => {
+			const handler = new AnthropicVertexHandler({
+				apiModelId: VERTEX_1M_CONTEXT_MODEL_IDS[0],
+				vertexProjectId: "test-project",
+				vertexRegion: "us-central1",
+				vertex1MContext: false,
+			})
+
+			const model = handler.getModel()
+			expect(model.info.contextWindow).toBe(200_000)
+			expect(model.info.inputPrice).toBe(3.0)
+			expect(model.info.outputPrice).toBe(15.0)
+			expect(model.betas).toBeUndefined()
+		})
+
+		it("should not enable 1M context for non-supported models even with flag", () => {
+			const handler = new AnthropicVertexHandler({
+				apiModelId: "claude-3-5-sonnet-v2@20241022",
+				vertexProjectId: "test-project",
+				vertexRegion: "us-central1",
+				vertex1MContext: true,
+			})
+
+			const model = handler.getModel()
+			expect(model.info.contextWindow).toBe(200_000)
+			expect(model.betas).toBeUndefined()
+		})
+	})
+
+	describe("1M context beta header", () => {
+		const mockMessages: Anthropic.Messages.MessageParam[] = [
+			{
+				role: "user",
+				content: "Hello",
+			},
+		]
+
+		const systemPrompt = "You are a helpful assistant"
+
+		it("should include anthropic-beta header when 1M context is enabled", async () => {
+			const handler = new AnthropicVertexHandler({
+				apiModelId: VERTEX_1M_CONTEXT_MODEL_IDS[0],
+				vertexProjectId: "test-project",
+				vertexRegion: "us-central1",
+				vertex1MContext: true,
+			})
+
+			const mockStream = [
+				{
+					type: "message_start",
+					message: {
+						usage: {
+							input_tokens: 10,
+							output_tokens: 0,
+						},
+					},
+				},
+			]
+
+			const asyncIterator = {
+				async *[Symbol.asyncIterator]() {
+					for (const chunk of mockStream) {
+						yield chunk
+					}
+				},
+			}
+
+			const mockCreate = vitest.fn().mockResolvedValue(asyncIterator)
+			;(handler["client"].messages as any).create = mockCreate
+
+			const stream = handler.createMessage(systemPrompt, mockMessages)
+
+			for await (const _chunk of stream) {
+				// Just consume
+			}
+
+			// Verify the API was called with the beta header
+			expect(mockCreate).toHaveBeenCalledWith(
+				expect.anything(),
+				expect.objectContaining({
+					headers: { "anthropic-beta": "context-1m-2025-08-07" },
+				}),
+			)
+		})
+
+		it("should not include anthropic-beta header when 1M context is disabled", async () => {
+			const handler = new AnthropicVertexHandler({
+				apiModelId: VERTEX_1M_CONTEXT_MODEL_IDS[0],
+				vertexProjectId: "test-project",
+				vertexRegion: "us-central1",
+				vertex1MContext: false,
+			})
+
+			const mockStream = [
+				{
+					type: "message_start",
+					message: {
+						usage: {
+							input_tokens: 10,
+							output_tokens: 0,
+						},
+					},
+				},
+			]
+
+			const asyncIterator = {
+				async *[Symbol.asyncIterator]() {
+					for (const chunk of mockStream) {
+						yield chunk
+					}
+				},
+			}
+
+			const mockCreate = vitest.fn().mockResolvedValue(asyncIterator)
+			;(handler["client"].messages as any).create = mockCreate
+
+			const stream = handler.createMessage(systemPrompt, mockMessages)
+
+			for await (const _chunk of stream) {
+				// Just consume
+			}
+
+			// Verify the API was called without the beta header
+			expect(mockCreate).toHaveBeenCalledWith(expect.anything(), undefined)
+		})
 	})
 
 	describe("thinking model configuration", () => {
@@ -946,6 +1109,7 @@ describe("VertexHandler", () => {
 					thinking: { type: "enabled", budget_tokens: 4096 },
 					temperature: 1.0, // Thinking requires temperature 1.0
 				}),
+				undefined,
 			)
 		})
 	})
@@ -1032,6 +1196,7 @@ describe("VertexHandler", () => {
 					]),
 					tool_choice: { type: "auto", disable_parallel_tool_use: true },
 				}),
+				undefined,
 			)
 		})
 
@@ -1080,6 +1245,7 @@ describe("VertexHandler", () => {
 				expect.not.objectContaining({
 					tools: expect.anything(),
 				}),
+				undefined,
 			)
 		})