Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .tmp/review/Roo-Code
Submodule Roo-Code added at 8dbd8c
1 change: 1 addition & 0 deletions .work/pr-8373/Roo-Code
Submodule Roo-Code added at b832e9
1 change: 1 addition & 0 deletions .work/reviews/Roo-Code
Submodule Roo-Code added at ea8420
195 changes: 195 additions & 0 deletions src/api/providers/__tests__/bedrock-temperature-top-p.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
// npx vitest run src/api/providers/__tests__/bedrock-temperature-top-p.spec.ts

import { vi, describe, it, expect, beforeEach } from "vitest"
import { AwsBedrockHandler } from "../bedrock"
import { ConverseStreamCommand, ConverseCommand } from "@aws-sdk/client-bedrock-runtime"
import type { Anthropic } from "@anthropic-ai/sdk"

// Mock AWS SDK credential providers
vi.mock("@aws-sdk/credential-providers", () => ({
fromIni: vi.fn().mockReturnValue({
accessKeyId: "test-access-key",
secretAccessKey: "test-secret-key",
}),
}))

// Mock BedrockRuntimeClient
vi.mock("@aws-sdk/client-bedrock-runtime", () => {
const mockSend = vi.fn().mockResolvedValue({
stream: [],
output: {
message: {
content: [{ text: "Test response" }],
},
},
})
const mockConverseStreamCommand = vi.fn()
const mockConverseCommand = vi.fn()

return {
BedrockRuntimeClient: vi.fn().mockImplementation(() => ({
send: mockSend,
})),
ConverseStreamCommand: mockConverseStreamCommand,
ConverseCommand: mockConverseCommand,
}
})

const mockConverseStreamCommand = vi.mocked(ConverseStreamCommand)
const mockConverseCommand = vi.mocked(ConverseCommand)

describe("Bedrock temperature and topP handling", () => {
let handler: AwsBedrockHandler

beforeEach(() => {
vi.clearAllMocks()
})

describe("Claude Sonnet 4.5 (3.5 v2) model", () => {
beforeEach(() => {
handler = new AwsBedrockHandler({
apiModelId: "anthropic.claude-sonnet-4-5-20250929-v1:0",
awsAccessKey: "test-access-key",
awsSecretKey: "test-secret-key",
awsRegion: "us-east-1",
modelTemperature: 0.7,
})
})

it("should only send temperature and not topP in createMessage when thinking is disabled", async () => {
const messages: Anthropic.Messages.MessageParam[] = [
{
role: "user",
content: "Test message",
},
]

const generator = handler.createMessage("", messages)
await generator.next()

expect(mockConverseStreamCommand).toHaveBeenCalled()
const commandArg = mockConverseStreamCommand.mock.calls[0][0] as any

// Should have temperature but not topP
expect(commandArg.inferenceConfig).toBeDefined()
expect(commandArg.inferenceConfig.temperature).toBe(0.7)
expect(commandArg.inferenceConfig.topP).toBeUndefined()
})

it("should only send temperature and not topP in completePrompt", async () => {
await handler.completePrompt("Test prompt")

expect(mockConverseCommand).toHaveBeenCalled()
const commandArg = mockConverseCommand.mock.calls[0][0] as any

// Should have temperature but not topP
expect(commandArg.inferenceConfig).toBeDefined()
expect(commandArg.inferenceConfig.temperature).toBe(0.7)
expect(commandArg.inferenceConfig.topP).toBeUndefined()
})
})

describe("Other Bedrock models", () => {
beforeEach(() => {
handler = new AwsBedrockHandler({
apiModelId: "anthropic.claude-3-5-sonnet-20241022-v2:0",
awsAccessKey: "test-access-key",
awsSecretKey: "test-secret-key",
awsRegion: "us-east-1",
modelTemperature: 0.7,
})
})

it("should only send temperature and not topP for Claude 3.5 Sonnet", async () => {
const messages: Anthropic.Messages.MessageParam[] = [
{
role: "user",
content: "Test message",
},
]

const generator = handler.createMessage("", messages)
await generator.next()

expect(mockConverseStreamCommand).toHaveBeenCalled()
const commandArg = mockConverseStreamCommand.mock.calls[0][0] as any

// Should have temperature but not topP
expect(commandArg.inferenceConfig).toBeDefined()
expect(commandArg.inferenceConfig.temperature).toBe(0.7)
expect(commandArg.inferenceConfig.topP).toBeUndefined()
})
})

describe("Models with thinking enabled", () => {
beforeEach(() => {
handler = new AwsBedrockHandler({
apiModelId: "anthropic.claude-3-7-sonnet-20250219-v1:0",
awsAccessKey: "test-access-key",
awsSecretKey: "test-secret-key",
awsRegion: "us-east-1",
modelTemperature: 0.7,
enableReasoningEffort: true,
})
})

it("should only send temperature when thinking is enabled", async () => {
const messages: Anthropic.Messages.MessageParam[] = [
{
role: "user",
content: "Test message",
},
]

const metadata = {
taskId: "test-task-id",
thinking: {
enabled: true,
maxThinkingTokens: 4096,
},
}

const generator = handler.createMessage("", messages, metadata as any)
await generator.next()

expect(mockConverseStreamCommand).toHaveBeenCalled()
const commandArg = mockConverseStreamCommand.mock.calls[0][0] as any

// Should have temperature but not topP when thinking is enabled
expect(commandArg.inferenceConfig).toBeDefined()
// Temperature is overridden to 1.0 when reasoning is enabled
expect(commandArg.inferenceConfig.temperature).toBe(1.0)
expect(commandArg.inferenceConfig.topP).toBeUndefined()
})
})

describe("Default temperature handling", () => {
it("should use default temperature when not specified", async () => {
handler = new AwsBedrockHandler({
apiModelId: "anthropic.claude-sonnet-4-5-20250929-v1:0",
awsAccessKey: "test-access-key",
awsSecretKey: "test-secret-key",
awsRegion: "us-east-1",
// No modelTemperature specified
})

const messages: Anthropic.Messages.MessageParam[] = [
{
role: "user",
content: "Test message",
},
]

const generator = handler.createMessage("", messages)
await generator.next()

expect(mockConverseStreamCommand).toHaveBeenCalled()
const commandArg = mockConverseStreamCommand.mock.calls[0][0] as any

// Should have default temperature (0.3) but not topP
expect(commandArg.inferenceConfig).toBeDefined()
expect(commandArg.inferenceConfig.temperature).toBe(0.3) // BEDROCK_DEFAULT_TEMPERATURE
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2: The test hardcodes 0.3 as the default temperature. To reduce brittleness if defaults change, consider centralizing the default in a shared constant in source and importing it here (or deriving it from the provider), rather than duplicating the literal.

expect(commandArg.inferenceConfig.topP).toBeUndefined()
})
})
})
9 changes: 5 additions & 4 deletions src/api/providers/bedrock.ts
Original file line number Diff line number Diff line change
Expand Up @@ -374,9 +374,9 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
temperature: modelConfig.temperature ?? (this.options.modelTemperature as number),
}

if (!thinkingEnabled) {
inferenceConfig.topP = 0.1
}
// AWS Bedrock doesn't allow both temperature and topP to be specified for certain models
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2: Nice fix. Consider clarifying in a short comment whether "skip topP" is intended as a permanent rule across all Bedrock models, not just Anthropic/Claude. This helps future maintainers avoid reintroducing it in other call paths.

// When thinking is not enabled and we would normally set topP, we'll skip it to avoid the error
// This maintains the existing behavior while being compatible with Bedrock's requirements

// Check if 1M context is enabled for Claude Sonnet 4
// Use parseBaseModelId to handle cross-region inference prefixes
Expand Down Expand Up @@ -647,7 +647,8 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
const inferenceConfig: BedrockInferenceConfig = {
maxTokens: modelConfig.maxTokens || (modelConfig.info.maxTokens as number),
temperature: modelConfig.temperature ?? (this.options.modelTemperature as number),
...(thinkingEnabled ? {} : { topP: 0.1 }), // Only set topP when thinking is NOT enabled
// AWS Bedrock doesn't allow both temperature and topP to be specified for certain models
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2: Since we no longer set topP anywhere for Bedrock, a brief note here like "Bedrock requires choosing between temperature and topP; we standardize on temperature" would make the policy explicit at both call sites.

// We'll only use temperature to avoid the "cannot both be specified" error
}

// For completePrompt, use a unique conversation ID based on the prompt
Expand Down
Loading