-
Notifications
You must be signed in to change notification settings - Fork 2.5k
fix: remove topP parameter for Bedrock models to fix Sonnet 4.5 compatibility #8378
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,195 @@ | ||
| // npx vitest run src/api/providers/__tests__/bedrock-temperature-top-p.spec.ts | ||
|
|
||
| import { vi, describe, it, expect, beforeEach } from "vitest" | ||
| import { AwsBedrockHandler } from "../bedrock" | ||
| import { ConverseStreamCommand, ConverseCommand } from "@aws-sdk/client-bedrock-runtime" | ||
| import type { Anthropic } from "@anthropic-ai/sdk" | ||
|
|
||
| // Mock AWS SDK credential providers | ||
| vi.mock("@aws-sdk/credential-providers", () => ({ | ||
| fromIni: vi.fn().mockReturnValue({ | ||
| accessKeyId: "test-access-key", | ||
| secretAccessKey: "test-secret-key", | ||
| }), | ||
| })) | ||
|
|
||
| // Mock BedrockRuntimeClient | ||
| vi.mock("@aws-sdk/client-bedrock-runtime", () => { | ||
| const mockSend = vi.fn().mockResolvedValue({ | ||
| stream: [], | ||
| output: { | ||
| message: { | ||
| content: [{ text: "Test response" }], | ||
| }, | ||
| }, | ||
| }) | ||
| const mockConverseStreamCommand = vi.fn() | ||
| const mockConverseCommand = vi.fn() | ||
|
|
||
| return { | ||
| BedrockRuntimeClient: vi.fn().mockImplementation(() => ({ | ||
| send: mockSend, | ||
| })), | ||
| ConverseStreamCommand: mockConverseStreamCommand, | ||
| ConverseCommand: mockConverseCommand, | ||
| } | ||
| }) | ||
|
|
||
| const mockConverseStreamCommand = vi.mocked(ConverseStreamCommand) | ||
| const mockConverseCommand = vi.mocked(ConverseCommand) | ||
|
|
||
| describe("Bedrock temperature and topP handling", () => { | ||
| let handler: AwsBedrockHandler | ||
|
|
||
| beforeEach(() => { | ||
| vi.clearAllMocks() | ||
| }) | ||
|
|
||
| describe("Claude Sonnet 4.5 (3.5 v2) model", () => { | ||
| beforeEach(() => { | ||
| handler = new AwsBedrockHandler({ | ||
| apiModelId: "anthropic.claude-sonnet-4-5-20250929-v1:0", | ||
| awsAccessKey: "test-access-key", | ||
| awsSecretKey: "test-secret-key", | ||
| awsRegion: "us-east-1", | ||
| modelTemperature: 0.7, | ||
| }) | ||
| }) | ||
|
|
||
| it("should only send temperature and not topP in createMessage when thinking is disabled", async () => { | ||
| const messages: Anthropic.Messages.MessageParam[] = [ | ||
| { | ||
| role: "user", | ||
| content: "Test message", | ||
| }, | ||
| ] | ||
|
|
||
| const generator = handler.createMessage("", messages) | ||
| await generator.next() | ||
|
|
||
| expect(mockConverseStreamCommand).toHaveBeenCalled() | ||
| const commandArg = mockConverseStreamCommand.mock.calls[0][0] as any | ||
|
|
||
| // Should have temperature but not topP | ||
| expect(commandArg.inferenceConfig).toBeDefined() | ||
| expect(commandArg.inferenceConfig.temperature).toBe(0.7) | ||
| expect(commandArg.inferenceConfig.topP).toBeUndefined() | ||
| }) | ||
|
|
||
| it("should only send temperature and not topP in completePrompt", async () => { | ||
| await handler.completePrompt("Test prompt") | ||
|
|
||
| expect(mockConverseCommand).toHaveBeenCalled() | ||
| const commandArg = mockConverseCommand.mock.calls[0][0] as any | ||
|
|
||
| // Should have temperature but not topP | ||
| expect(commandArg.inferenceConfig).toBeDefined() | ||
| expect(commandArg.inferenceConfig.temperature).toBe(0.7) | ||
| expect(commandArg.inferenceConfig.topP).toBeUndefined() | ||
| }) | ||
| }) | ||
|
|
||
| describe("Other Bedrock models", () => { | ||
| beforeEach(() => { | ||
| handler = new AwsBedrockHandler({ | ||
| apiModelId: "anthropic.claude-3-5-sonnet-20241022-v2:0", | ||
| awsAccessKey: "test-access-key", | ||
| awsSecretKey: "test-secret-key", | ||
| awsRegion: "us-east-1", | ||
| modelTemperature: 0.7, | ||
| }) | ||
| }) | ||
|
|
||
| it("should only send temperature and not topP for Claude 3.5 Sonnet", async () => { | ||
| const messages: Anthropic.Messages.MessageParam[] = [ | ||
| { | ||
| role: "user", | ||
| content: "Test message", | ||
| }, | ||
| ] | ||
|
|
||
| const generator = handler.createMessage("", messages) | ||
| await generator.next() | ||
|
|
||
| expect(mockConverseStreamCommand).toHaveBeenCalled() | ||
| const commandArg = mockConverseStreamCommand.mock.calls[0][0] as any | ||
|
|
||
| // Should have temperature but not topP | ||
| expect(commandArg.inferenceConfig).toBeDefined() | ||
| expect(commandArg.inferenceConfig.temperature).toBe(0.7) | ||
| expect(commandArg.inferenceConfig.topP).toBeUndefined() | ||
| }) | ||
| }) | ||
|
|
||
| describe("Models with thinking enabled", () => { | ||
| beforeEach(() => { | ||
| handler = new AwsBedrockHandler({ | ||
| apiModelId: "anthropic.claude-3-7-sonnet-20250219-v1:0", | ||
| awsAccessKey: "test-access-key", | ||
| awsSecretKey: "test-secret-key", | ||
| awsRegion: "us-east-1", | ||
| modelTemperature: 0.7, | ||
| enableReasoningEffort: true, | ||
| }) | ||
| }) | ||
|
|
||
| it("should only send temperature when thinking is enabled", async () => { | ||
| const messages: Anthropic.Messages.MessageParam[] = [ | ||
| { | ||
| role: "user", | ||
| content: "Test message", | ||
| }, | ||
| ] | ||
|
|
||
| const metadata = { | ||
| taskId: "test-task-id", | ||
| thinking: { | ||
| enabled: true, | ||
| maxThinkingTokens: 4096, | ||
| }, | ||
| } | ||
|
|
||
| const generator = handler.createMessage("", messages, metadata as any) | ||
| await generator.next() | ||
|
|
||
| expect(mockConverseStreamCommand).toHaveBeenCalled() | ||
| const commandArg = mockConverseStreamCommand.mock.calls[0][0] as any | ||
|
|
||
| // Should have temperature but not topP when thinking is enabled | ||
| expect(commandArg.inferenceConfig).toBeDefined() | ||
| // Temperature is overridden to 1.0 when reasoning is enabled | ||
| expect(commandArg.inferenceConfig.temperature).toBe(1.0) | ||
| expect(commandArg.inferenceConfig.topP).toBeUndefined() | ||
| }) | ||
| }) | ||
|
|
||
| describe("Default temperature handling", () => { | ||
| it("should use default temperature when not specified", async () => { | ||
| handler = new AwsBedrockHandler({ | ||
| apiModelId: "anthropic.claude-sonnet-4-5-20250929-v1:0", | ||
| awsAccessKey: "test-access-key", | ||
| awsSecretKey: "test-secret-key", | ||
| awsRegion: "us-east-1", | ||
| // No modelTemperature specified | ||
| }) | ||
|
|
||
| const messages: Anthropic.Messages.MessageParam[] = [ | ||
| { | ||
| role: "user", | ||
| content: "Test message", | ||
| }, | ||
| ] | ||
|
|
||
| const generator = handler.createMessage("", messages) | ||
| await generator.next() | ||
|
|
||
| expect(mockConverseStreamCommand).toHaveBeenCalled() | ||
| const commandArg = mockConverseStreamCommand.mock.calls[0][0] as any | ||
|
|
||
| // Should have default temperature (0.3) but not topP | ||
| expect(commandArg.inferenceConfig).toBeDefined() | ||
| expect(commandArg.inferenceConfig.temperature).toBe(0.3) // BEDROCK_DEFAULT_TEMPERATURE | ||
| expect(commandArg.inferenceConfig.topP).toBeUndefined() | ||
| }) | ||
| }) | ||
| }) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -374,9 +374,9 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH | |
| temperature: modelConfig.temperature ?? (this.options.modelTemperature as number), | ||
| } | ||
|
|
||
| if (!thinkingEnabled) { | ||
| inferenceConfig.topP = 0.1 | ||
| } | ||
| // AWS Bedrock doesn't allow both temperature and topP to be specified for certain models | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. P2: Nice fix. Consider clarifying in a short comment whether "skip topP" is intended as a permanent rule across all Bedrock models, not just Anthropic/Claude. This helps future maintainers avoid reintroducing it in other call paths. |
||
| // When thinking is not enabled and we would normally set topP, we'll skip it to avoid the error | ||
| // This maintains the existing behavior while being compatible with Bedrock's requirements | ||
|
|
||
| // Check if 1M context is enabled for Claude Sonnet 4 | ||
| // Use parseBaseModelId to handle cross-region inference prefixes | ||
|
|
@@ -647,7 +647,8 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH | |
| const inferenceConfig: BedrockInferenceConfig = { | ||
| maxTokens: modelConfig.maxTokens || (modelConfig.info.maxTokens as number), | ||
| temperature: modelConfig.temperature ?? (this.options.modelTemperature as number), | ||
| ...(thinkingEnabled ? {} : { topP: 0.1 }), // Only set topP when thinking is NOT enabled | ||
| // AWS Bedrock doesn't allow both temperature and topP to be specified for certain models | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. P2: Since we no longer set topP anywhere for Bedrock, a brief note here like "Bedrock requires choosing between temperature and topP; we standardize on temperature" would make the policy explicit at both call sites. |
||
| // We'll only use temperature to avoid the "cannot both be specified" error | ||
| } | ||
|
|
||
| // For completePrompt, use a unique conversation ID based on the prompt | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
P2: The test hardcodes 0.3 as the default temperature. To reduce brittleness if defaults change, consider centralizing the default in a shared constant in source and importing it here (or deriving it from the provider), rather than duplicating the literal.