RooCodeInc · mrubens · Dec 4, 2025 · Dec 3, 2025 · Dec 4, 2025 · Dec 4, 2025
diff --git a/.changeset/cerebras-conservative-max-tokens.md b/.changeset/cerebras-conservative-max-tokens.md
@@ -0,0 +1,13 @@
+---
+"roo-cline": patch
+---
+
+fix(cerebras): use conservative max_tokens and add integration header
+
+**Conservative max_tokens:**
+Cerebras rate limiter estimates token consumption using max_completion_tokens upfront rather than actual usage. When agentic tools automatically set this to the model maximum (e.g., 64K), users exhaust their quota prematurely and get rate-limited despite minimal actual token consumption.
+
+This fix uses a conservative default of 8K tokens instead of the model maximum. This is sufficient for most agentic tool use while preserving rate limit headroom.
+
+**Integration header:**
+Added `X-Cerebras-3rd-Party-Integration: roocode` header to all Cerebras API requests for tracking and analytics.
@@ -7,7 +7,7 @@ export const cerebrasDefaultModelId: CerebrasModelId = "gpt-oss-120b"
 
 export const cerebrasModels = {
 	"zai-glm-4.6": {
-		maxTokens: 16384, // consistent with their other models
+		maxTokens: 8192, // Conservative default to avoid premature rate limiting (Cerebras reserves quota upfront)
 		contextWindow: 131072,
 		supportsImages: false,
 		supportsPromptCache: false,
@@ -17,7 +17,7 @@ export const cerebrasModels = {
 		description: "Highly intelligent general purpose model with up to 1,000 tokens/s",
 	},
 	"qwen-3-235b-a22b-instruct-2507": {
-		maxTokens: 64000,
+		maxTokens: 8192, // Conservative default to avoid premature rate limiting
 		contextWindow: 64000,
 		supportsImages: false,
 		supportsPromptCache: false,
@@ -27,7 +27,7 @@ export const cerebrasModels = {
 		description: "Intelligent model with ~1400 tokens/s",
 	},
 	"llama-3.3-70b": {
-		maxTokens: 64000,
+		maxTokens: 8192, // Conservative default to avoid premature rate limiting
 		contextWindow: 64000,
 		supportsImages: false,
 		supportsPromptCache: false,
@@ -37,7 +37,7 @@ export const cerebrasModels = {
 		description: "Powerful model with ~2600 tokens/s",
 	},
 	"qwen-3-32b": {
-		maxTokens: 64000,
+		maxTokens: 8192, // Conservative default to avoid premature rate limiting
 		contextWindow: 64000,
 		supportsImages: false,
 		supportsPromptCache: false,
@@ -47,7 +47,7 @@ export const cerebrasModels = {
 		description: "SOTA coding performance with ~2500 tokens/s",
 	},
 	"gpt-oss-120b": {
-		maxTokens: 8000,
+		maxTokens: 8192, // Conservative default to avoid premature rate limiting
 		contextWindow: 64000,
 		supportsImages: false,
 		supportsPromptCache: false,

@@ -16,6 +16,9 @@ import { t } from "../../i18n"
 const CEREBRAS_BASE_URL = "https://api.cerebras.ai/v1"
 const CEREBRAS_DEFAULT_TEMPERATURE = 0
 
+const CEREBRAS_INTEGRATION_HEADER = "X-Cerebras-3rd-Party-Integration"
+const CEREBRAS_INTEGRATION_NAME = "roocode"
+
 export class CerebrasHandler extends BaseProvider implements SingleCompletionHandler {
 	private apiKey: string
 	private providerModels: typeof cerebrasModels
@@ -36,11 +39,12 @@ export class CerebrasHandler extends BaseProvider implements SingleCompletionHan
 	}
 
 	getModel(): { id: CerebrasModelId; info: (typeof cerebrasModels)[CerebrasModelId] } {
-		const modelId = (this.options.apiModelId as CerebrasModelId) || this.defaultProviderModelId
+		const modelId = this.options.apiModelId as CerebrasModelId
+		const validModelId = modelId && this.providerModels[modelId] ? modelId : this.defaultProviderModelId
 
 		return {
-			id: modelId,
-			info: this.providerModels[modelId],
+			id: validModelId,
+			info: this.providerModels[validModelId],
 		}
 	}
 
@@ -130,6 +134,7 @@ export class CerebrasHandler extends BaseProvider implements SingleCompletionHan
 					...DEFAULT_HEADERS,
 					"Content-Type": "application/json",
 					Authorization: `Bearer ${this.apiKey}`,
+					[CEREBRAS_INTEGRATION_HEADER]: CEREBRAS_INTEGRATION_NAME,
 				},
 				body: JSON.stringify(requestBody),
 			})
@@ -291,6 +296,7 @@ export class CerebrasHandler extends BaseProvider implements SingleCompletionHan
 					...DEFAULT_HEADERS,
 					"Content-Type": "application/json",
 					Authorization: `Bearer ${this.apiKey}`,
+					[CEREBRAS_INTEGRATION_HEADER]: CEREBRAS_INTEGRATION_NAME,
 				},
 				body: JSON.stringify(requestBody),
 			})