Skip to content

Commit 88272e9

Browse files
Add Qwen 3 Coder from Cerebras (#6562)
Co-authored-by: Matt Rubens <[email protected]>
1 parent 7cbb37d commit 88272e9

File tree

3 files changed

+45
-16
lines changed

3 files changed

+45
-16
lines changed

packages/types/src/providers/cerebras.ts

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,44 +3,64 @@ import type { ModelInfo } from "../model.js"
33
// https://inference-docs.cerebras.ai/api-reference/chat-completions
44
export type CerebrasModelId = keyof typeof cerebrasModels
55

6-
export const cerebrasDefaultModelId: CerebrasModelId = "qwen-3-235b-a22b-instruct-2507"
6+
export const cerebrasDefaultModelId: CerebrasModelId = "qwen-3-coder-480b-free"
77

88
export const cerebrasModels = {
9-
"llama-3.3-70b": {
10-
maxTokens: 64000,
9+
"qwen-3-coder-480b-free": {
10+
maxTokens: 40000,
1111
contextWindow: 64000,
1212
supportsImages: false,
1313
supportsPromptCache: false,
1414
inputPrice: 0,
1515
outputPrice: 0,
16-
description: "Smart model with ~2600 tokens/s",
16+
description:
17+
"SOTA coding model with ~2000 tokens/s ($0 free tier)\n\n• Use this if you don't have a Cerebras subscription\n• 64K context window\n• Rate limits: 150K TPM, 1M TPH/TPD, 10 RPM, 100 RPH/RPD\n\nUpgrade for higher limits: [https://cloud.cerebras.ai/?utm=roocode](https://cloud.cerebras.ai/?utm=roocode)",
1718
},
18-
"qwen-3-32b": {
19+
"qwen-3-coder-480b": {
20+
maxTokens: 40000,
21+
contextWindow: 128000,
22+
supportsImages: false,
23+
supportsPromptCache: false,
24+
inputPrice: 0,
25+
outputPrice: 0,
26+
description:
27+
"SOTA coding model with ~2000 tokens/s ($50/$250 paid tiers)\n\n• Use this if you have a Cerebras subscription\n• 131K context window with higher rate limits",
28+
},
29+
"qwen-3-235b-a22b-instruct-2507": {
1930
maxTokens: 64000,
2031
contextWindow: 64000,
2132
supportsImages: false,
2233
supportsPromptCache: false,
2334
inputPrice: 0,
2435
outputPrice: 0,
25-
description: "SOTA coding performance with ~2500 tokens/s",
36+
description: "Intelligent model with ~1400 tokens/s",
2637
},
27-
"qwen-3-235b-a22b": {
28-
maxTokens: 40000,
29-
contextWindow: 40000,
38+
"llama-3.3-70b": {
39+
maxTokens: 64000,
40+
contextWindow: 64000,
3041
supportsImages: false,
3142
supportsPromptCache: false,
3243
inputPrice: 0,
3344
outputPrice: 0,
34-
description: "SOTA performance with ~1400 tokens/s",
45+
description: "Powerful model with ~2600 tokens/s",
3546
},
36-
"qwen-3-235b-a22b-instruct-2507": {
47+
"qwen-3-32b": {
3748
maxTokens: 64000,
3849
contextWindow: 64000,
3950
supportsImages: false,
4051
supportsPromptCache: false,
4152
inputPrice: 0,
4253
outputPrice: 0,
43-
description: "SOTA performance with ~1400 tokens/s",
54+
description: "SOTA coding performance with ~2500 tokens/s",
55+
},
56+
"qwen-3-235b-a22b-thinking-2507": {
57+
maxTokens: 40000,
58+
contextWindow: 65000,
59+
supportsImages: false,
60+
supportsPromptCache: false,
61+
inputPrice: 0,
62+
outputPrice: 0,
63+
description: "SOTA performance with ~1500 tokens/s",
4464
supportsReasoningEffort: true,
4565
},
4666
} as const satisfies Record<string, ModelInfo>

src/api/providers/__tests__/cerebras.spec.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ describe("CerebrasHandler", () => {
5858
it("should fallback to default model when apiModelId is not provided", () => {
5959
const handlerWithoutModel = new CerebrasHandler({ cerebrasApiKey: "test" })
6060
const { id } = handlerWithoutModel.getModel()
61-
expect(id).toBe("qwen-3-235b-a22b-instruct-2507") // cerebrasDefaultModelId
61+
expect(id).toBe("qwen-3-coder-480b") // cerebrasDefaultModelId (routed)
6262
})
6363
})
6464

src/api/providers/cerebras.ts

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,10 +98,19 @@ export class CerebrasHandler extends BaseProvider implements SingleCompletionHan
9898
}
9999

100100
getModel(): { id: CerebrasModelId; info: (typeof cerebrasModels)[CerebrasModelId] } {
101-
const modelId = (this.options.apiModelId as CerebrasModelId) || this.defaultProviderModelId
101+
const originalModelId = (this.options.apiModelId as CerebrasModelId) || this.defaultProviderModelId
102+
103+
// Route both qwen coder models to the same actual model ID for API calls
104+
// This allows them to have different rate limits/descriptions in the UI
105+
// while using the same underlying model
106+
let apiModelId = originalModelId
107+
if (originalModelId === "qwen-3-coder-480b-free") {
108+
apiModelId = "qwen-3-coder-480b"
109+
}
110+
102111
return {
103-
id: modelId,
104-
info: this.providerModels[modelId],
112+
id: apiModelId,
113+
info: this.providerModels[originalModelId], // Use original model info for rate limits/descriptions
105114
}
106115
}
107116

0 commit comments

Comments
 (0)