Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions packages/types/src/provider-settings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,7 @@ const vertexSchema = apiModelIdProviderModelSchema.extend({
vertexRegion: z.string().optional(),
enableUrlContext: z.boolean().optional(),
enableGrounding: z.boolean().optional(),
vertex1MContext: z.boolean().optional(), // Enable 'context-1m-2025-08-07' beta for 1M context window.
})

const openAiSchema = baseProviderSettingsSchema.extend({
Expand Down
44 changes: 34 additions & 10 deletions packages/types/src/providers/vertex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -275,29 +275,49 @@ export const vertexModels = {
},
"claude-sonnet-4@20250514": {
maxTokens: 8192,
contextWindow: 200_000,
contextWindow: 200_000, // Default 200K, extendable to 1M with beta flag 'context-1m-2025-08-07'
supportsImages: true,
supportsPromptCache: true,
supportsNativeTools: true,
defaultToolProtocol: "native",
inputPrice: 3.0,
outputPrice: 15.0,
cacheWritesPrice: 3.75,
cacheReadsPrice: 0.3,
inputPrice: 3.0, // $3 per million input tokens (≤200K context)
outputPrice: 15.0, // $15 per million output tokens (≤200K context)
cacheWritesPrice: 3.75, // $3.75 per million tokens
cacheReadsPrice: 0.3, // $0.30 per million tokens
supportsReasoningBudget: true,
// Tiered pricing for extended context (requires beta flag 'context-1m-2025-08-07')
tiers: [
{
contextWindow: 1_000_000, // 1M tokens with beta flag
inputPrice: 6.0, // $6 per million input tokens (>200K context)
outputPrice: 22.5, // $22.50 per million output tokens (>200K context)
cacheWritesPrice: 7.5, // $7.50 per million tokens (>200K context)
cacheReadsPrice: 0.6, // $0.60 per million tokens (>200K context)
},
],
},
"claude-sonnet-4-5@20250929": {
maxTokens: 8192,
contextWindow: 200_000,
contextWindow: 200_000, // Default 200K, extendable to 1M with beta flag 'context-1m-2025-08-07'
supportsImages: true,
supportsPromptCache: true,
supportsNativeTools: true,
defaultToolProtocol: "native",
inputPrice: 3.0,
outputPrice: 15.0,
cacheWritesPrice: 3.75,
cacheReadsPrice: 0.3,
inputPrice: 3.0, // $3 per million input tokens (≤200K context)
outputPrice: 15.0, // $15 per million output tokens (≤200K context)
cacheWritesPrice: 3.75, // $3.75 per million tokens
cacheReadsPrice: 0.3, // $0.30 per million tokens
supportsReasoningBudget: true,
// Tiered pricing for extended context (requires beta flag 'context-1m-2025-08-07')
tiers: [
{
contextWindow: 1_000_000, // 1M tokens with beta flag
inputPrice: 6.0, // $6 per million input tokens (>200K context)
outputPrice: 22.5, // $22.50 per million output tokens (>200K context)
cacheWritesPrice: 7.5, // $7.50 per million tokens (>200K context)
cacheReadsPrice: 0.6, // $0.60 per million tokens (>200K context)
},
],
},
"claude-haiku-4-5@20251001": {
maxTokens: 8192,
Expand Down Expand Up @@ -517,6 +537,10 @@ export const vertexModels = {
},
} as const satisfies Record<string, ModelInfo>

// Vertex AI models that support 1M context window beta
// Uses the same beta header 'context-1m-2025-08-07' as Anthropic and Bedrock
export const VERTEX_1M_CONTEXT_MODEL_IDS = ["claude-sonnet-4@20250514", "claude-sonnet-4-5@20250929"] as const

export const VERTEX_REGIONS = [
{ value: "global", label: "global" },
{ value: "us-central1", label: "us-central1" },
Expand Down
224 changes: 195 additions & 29 deletions src/api/providers/__tests__/anthropic-vertex.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import { Anthropic } from "@anthropic-ai/sdk"
import { AnthropicVertex } from "@anthropic-ai/vertex-sdk"

import { VERTEX_1M_CONTEXT_MODEL_IDS } from "@roo-code/types"

import { ApiStreamChunk } from "../../transform/stream"

import { AnthropicVertexHandler } from "../anthropic-vertex"
Expand Down Expand Up @@ -159,35 +161,39 @@ describe("VertexHandler", () => {
outputTokens: 5,
})

expect(mockCreate).toHaveBeenCalledWith({
model: "claude-3-5-sonnet-v2@20241022",
max_tokens: 8192,
temperature: 0,
system: [
{
type: "text",
text: "You are a helpful assistant",
cache_control: { type: "ephemeral" },
},
],
messages: [
{
role: "user",
content: [
{
type: "text",
text: "Hello",
cache_control: { type: "ephemeral" },
},
],
},
{
role: "assistant",
content: "Hi there!",
},
],
stream: true,
})
expect(mockCreate).toHaveBeenCalledWith(
{
model: "claude-3-5-sonnet-v2@20241022",
max_tokens: 8192,
temperature: 0,
thinking: undefined,
system: [
{
type: "text",
text: "You are a helpful assistant",
cache_control: { type: "ephemeral" },
},
],
messages: [
{
role: "user",
content: [
{
type: "text",
text: "Hello",
cache_control: { type: "ephemeral" },
},
],
},
{
role: "assistant",
content: "Hi there!",
},
],
stream: true,
},
undefined,
)
})

it("should handle multiple content blocks with line breaks for Claude", async () => {
Expand Down Expand Up @@ -401,6 +407,7 @@ describe("VertexHandler", () => {
}),
],
}),
undefined,
)
})

Expand Down Expand Up @@ -858,6 +865,162 @@ describe("VertexHandler", () => {
expect(result.reasoningBudget).toBeUndefined()
expect(result.temperature).toBe(0)
})

it("should enable 1M context for Claude Sonnet 4 when beta flag is set", () => {
const handler = new AnthropicVertexHandler({
apiModelId: VERTEX_1M_CONTEXT_MODEL_IDS[0],
vertexProjectId: "test-project",
vertexRegion: "us-central1",
vertex1MContext: true,
})

const model = handler.getModel()
expect(model.info.contextWindow).toBe(1_000_000)
expect(model.info.inputPrice).toBe(6.0)
expect(model.info.outputPrice).toBe(22.5)
expect(model.betas).toContain("context-1m-2025-08-07")
})

it("should enable 1M context for Claude Sonnet 4.5 when beta flag is set", () => {
const handler = new AnthropicVertexHandler({
apiModelId: VERTEX_1M_CONTEXT_MODEL_IDS[1],
vertexProjectId: "test-project",
vertexRegion: "us-central1",
vertex1MContext: true,
})

const model = handler.getModel()
expect(model.info.contextWindow).toBe(1_000_000)
expect(model.info.inputPrice).toBe(6.0)
expect(model.info.outputPrice).toBe(22.5)
expect(model.betas).toContain("context-1m-2025-08-07")
})

it("should not enable 1M context when flag is disabled", () => {
const handler = new AnthropicVertexHandler({
apiModelId: VERTEX_1M_CONTEXT_MODEL_IDS[0],
vertexProjectId: "test-project",
vertexRegion: "us-central1",
vertex1MContext: false,
})

const model = handler.getModel()
expect(model.info.contextWindow).toBe(200_000)
expect(model.info.inputPrice).toBe(3.0)
expect(model.info.outputPrice).toBe(15.0)
expect(model.betas).toBeUndefined()
})

it("should not enable 1M context for non-supported models even with flag", () => {
const handler = new AnthropicVertexHandler({
apiModelId: "claude-3-5-sonnet-v2@20241022",
vertexProjectId: "test-project",
vertexRegion: "us-central1",
vertex1MContext: true,
})

const model = handler.getModel()
expect(model.info.contextWindow).toBe(200_000)
expect(model.betas).toBeUndefined()
})
})

describe("1M context beta header", () => {
const mockMessages: Anthropic.Messages.MessageParam[] = [
{
role: "user",
content: "Hello",
},
]

const systemPrompt = "You are a helpful assistant"

it("should include anthropic-beta header when 1M context is enabled", async () => {
const handler = new AnthropicVertexHandler({
apiModelId: VERTEX_1M_CONTEXT_MODEL_IDS[0],
vertexProjectId: "test-project",
vertexRegion: "us-central1",
vertex1MContext: true,
})

const mockStream = [
{
type: "message_start",
message: {
usage: {
input_tokens: 10,
output_tokens: 0,
},
},
},
]

const asyncIterator = {
async *[Symbol.asyncIterator]() {
for (const chunk of mockStream) {
yield chunk
}
},
}

const mockCreate = vitest.fn().mockResolvedValue(asyncIterator)
;(handler["client"].messages as any).create = mockCreate

const stream = handler.createMessage(systemPrompt, mockMessages)

for await (const _chunk of stream) {
// Just consume
}

// Verify the API was called with the beta header
expect(mockCreate).toHaveBeenCalledWith(
expect.anything(),
expect.objectContaining({
headers: { "anthropic-beta": "context-1m-2025-08-07" },
}),
)
})

it("should not include anthropic-beta header when 1M context is disabled", async () => {
const handler = new AnthropicVertexHandler({
apiModelId: VERTEX_1M_CONTEXT_MODEL_IDS[0],
vertexProjectId: "test-project",
vertexRegion: "us-central1",
vertex1MContext: false,
})

const mockStream = [
{
type: "message_start",
message: {
usage: {
input_tokens: 10,
output_tokens: 0,
},
},
},
]

const asyncIterator = {
async *[Symbol.asyncIterator]() {
for (const chunk of mockStream) {
yield chunk
}
},
}

const mockCreate = vitest.fn().mockResolvedValue(asyncIterator)
;(handler["client"].messages as any).create = mockCreate

const stream = handler.createMessage(systemPrompt, mockMessages)

for await (const _chunk of stream) {
// Just consume
}

// Verify the API was called without the beta header
expect(mockCreate).toHaveBeenCalledWith(expect.anything(), undefined)
})
})

describe("thinking model configuration", () => {
Expand Down Expand Up @@ -946,6 +1109,7 @@ describe("VertexHandler", () => {
thinking: { type: "enabled", budget_tokens: 4096 },
temperature: 1.0, // Thinking requires temperature 1.0
}),
undefined,
)
})
})
Expand Down Expand Up @@ -1032,6 +1196,7 @@ describe("VertexHandler", () => {
]),
tool_choice: { type: "auto", disable_parallel_tool_use: true },
}),
undefined,
)
})

Expand Down Expand Up @@ -1080,6 +1245,7 @@ describe("VertexHandler", () => {
expect.not.objectContaining({
tools: expect.anything(),
}),
undefined,
)
})

Expand Down
Loading
Loading