Add Qwen 3 Coder from Cerebras (#6562)

kevint-cerebras · mrubens · web-flow · commit 88272e9b0248 · 2025-08-01T15:29:33.000-04:00
Co-authored-by: Matt Rubens &lt;mrubens@users.noreply.github.com&gt;
diff --git a/packages/types/src/providers/cerebras.ts b/packages/types/src/providers/cerebras.ts
@@ -3,44 +3,64 @@ import type { ModelInfo } from "../model.js"
 // https://inference-docs.cerebras.ai/api-reference/chat-completions
 export type CerebrasModelId = keyof typeof cerebrasModels
 
-export const cerebrasDefaultModelId: CerebrasModelId = "qwen-3-235b-a22b-instruct-2507"
+export const cerebrasDefaultModelId: CerebrasModelId = "qwen-3-coder-480b-free"
 
 export const cerebrasModels = {
-	"llama-3.3-70b": {
-		maxTokens: 64000,
+	"qwen-3-coder-480b-free": {
+		maxTokens: 40000,
 		contextWindow: 64000,
 		supportsImages: false,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		description: "Smart model with ~2600 tokens/s",
+		description:
+			"SOTA coding model with ~2000 tokens/s ($0 free tier)\n\n• Use this if you don't have a Cerebras subscription\n• 64K context window\n• Rate limits: 150K TPM, 1M TPH/TPD, 10 RPM, 100 RPH/RPD\n\nUpgrade for higher limits: [https://cloud.cerebras.ai/?utm=roocode](https://cloud.cerebras.ai/?utm=roocode)",
 	},
-	"qwen-3-32b": {
+	"qwen-3-coder-480b": {
+		maxTokens: 40000,
+		contextWindow: 128000,
+		supportsImages: false,
+		supportsPromptCache: false,
+		inputPrice: 0,
+		outputPrice: 0,
+		description:
+			"SOTA coding model with ~2000 tokens/s ($50/$250 paid tiers)\n\n• Use this if you have a Cerebras subscription\n• 131K context window with higher rate limits",
+	},
+	"qwen-3-235b-a22b-instruct-2507": {
 		maxTokens: 64000,
 		contextWindow: 64000,
 		supportsImages: false,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		description: "SOTA coding performance with ~2500 tokens/s",
+		description: "Intelligent model with ~1400 tokens/s",
 	},
-	"qwen-3-235b-a22b": {
-		maxTokens: 40000,
-		contextWindow: 40000,
+	"llama-3.3-70b": {
+		maxTokens: 64000,
+		contextWindow: 64000,
 		supportsImages: false,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		description: "SOTA performance with ~1400 tokens/s",
+		description: "Powerful model with ~2600 tokens/s",
 	},
-	"qwen-3-235b-a22b-instruct-2507": {
+	"qwen-3-32b": {
 		maxTokens: 64000,
 		contextWindow: 64000,
 		supportsImages: false,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		description: "SOTA performance with ~1400 tokens/s",
+		description: "SOTA coding performance with ~2500 tokens/s",
+	},
+	"qwen-3-235b-a22b-thinking-2507": {
+		maxTokens: 40000,
+		contextWindow: 65000,
+		supportsImages: false,
+		supportsPromptCache: false,
+		inputPrice: 0,
+		outputPrice: 0,
+		description: "SOTA performance with ~1500 tokens/s",
 		supportsReasoningEffort: true,
 	},
 } as const satisfies Record<string, ModelInfo>
diff --git a/src/api/providers/__tests__/cerebras.spec.ts b/src/api/providers/__tests__/cerebras.spec.ts
@@ -58,7 +58,7 @@ describe("CerebrasHandler", () => {
 		it("should fallback to default model when apiModelId is not provided", () => {
 			const handlerWithoutModel = new CerebrasHandler({ cerebrasApiKey: "test" })
 			const { id } = handlerWithoutModel.getModel()
-			expect(id).toBe("qwen-3-235b-a22b-instruct-2507") // cerebrasDefaultModelId
+			expect(id).toBe("qwen-3-coder-480b") // cerebrasDefaultModelId (routed)
 		})
 	})
 
diff --git a/src/api/providers/cerebras.ts b/src/api/providers/cerebras.ts
@@ -98,10 +98,19 @@ export class CerebrasHandler extends BaseProvider implements SingleCompletionHan
 	}
 
 	getModel(): { id: CerebrasModelId; info: (typeof cerebrasModels)[CerebrasModelId] } {
-		const modelId = (this.options.apiModelId as CerebrasModelId) || this.defaultProviderModelId
+		const originalModelId = (this.options.apiModelId as CerebrasModelId) || this.defaultProviderModelId
+
+		// Route both qwen coder models to the same actual model ID for API calls
+		// This allows them to have different rate limits/descriptions in the UI
+		// while using the same underlying model
+		let apiModelId = originalModelId
+		if (originalModelId === "qwen-3-coder-480b-free") {
+			apiModelId = "qwen-3-coder-480b"
+		}
+
 		return {
-			id: modelId,
-			info: this.providerModels[modelId],
+			id: apiModelId,
+			info: this.providerModels[originalModelId], // Use original model info for rate limits/descriptions
 		}
 	}