Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions .changeset/cerebras-conservative-max-tokens.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
---
"roo-cline": patch
---

fix(cerebras): use conservative max_tokens and add integration header

**Conservative max_tokens:**
Cerebras rate limiter estimates token consumption using max_completion_tokens upfront rather than actual usage. When agentic tools automatically set this to the model maximum (e.g., 64K), users exhaust their quota prematurely and get rate-limited despite minimal actual token consumption.

This fix uses a conservative default of 8K tokens instead of the model maximum. This is sufficient for most agentic tool use while preserving rate limit headroom.

**Integration header:**
Added `X-Cerebras-3rd-Party-Integration: roocode` header to all Cerebras API requests for tracking and analytics.
10 changes: 5 additions & 5 deletions packages/types/src/providers/cerebras.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ export const cerebrasDefaultModelId: CerebrasModelId = "gpt-oss-120b"

export const cerebrasModels = {
"zai-glm-4.6": {
maxTokens: 16384, // consistent with their other models
maxTokens: 8192, // Conservative default to avoid premature rate limiting (Cerebras reserves quota upfront)
contextWindow: 131072,
supportsImages: false,
supportsPromptCache: false,
Expand All @@ -17,7 +17,7 @@ export const cerebrasModels = {
description: "Highly intelligent general purpose model with up to 1,000 tokens/s",
},
"qwen-3-235b-a22b-instruct-2507": {
maxTokens: 64000,
maxTokens: 8192, // Conservative default to avoid premature rate limiting
contextWindow: 64000,
supportsImages: false,
supportsPromptCache: false,
Expand All @@ -27,7 +27,7 @@ export const cerebrasModels = {
description: "Intelligent model with ~1400 tokens/s",
},
"llama-3.3-70b": {
maxTokens: 64000,
maxTokens: 8192, // Conservative default to avoid premature rate limiting
contextWindow: 64000,
supportsImages: false,
supportsPromptCache: false,
Expand All @@ -37,7 +37,7 @@ export const cerebrasModels = {
description: "Powerful model with ~2600 tokens/s",
},
"qwen-3-32b": {
maxTokens: 64000,
maxTokens: 8192, // Conservative default to avoid premature rate limiting
contextWindow: 64000,
supportsImages: false,
supportsPromptCache: false,
Expand All @@ -47,7 +47,7 @@ export const cerebrasModels = {
description: "SOTA coding performance with ~2500 tokens/s",
},
"gpt-oss-120b": {
maxTokens: 8000,
maxTokens: 8192, // Conservative default to avoid premature rate limiting
contextWindow: 64000,
supportsImages: false,
supportsPromptCache: false,
Expand Down
12 changes: 9 additions & 3 deletions src/api/providers/cerebras.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ import { t } from "../../i18n"
const CEREBRAS_BASE_URL = "https://api.cerebras.ai/v1"
const CEREBRAS_DEFAULT_TEMPERATURE = 0

const CEREBRAS_INTEGRATION_HEADER = "X-Cerebras-3rd-Party-Integration"
const CEREBRAS_INTEGRATION_NAME = "roocode"

export class CerebrasHandler extends BaseProvider implements SingleCompletionHandler {
private apiKey: string
private providerModels: typeof cerebrasModels
Expand All @@ -36,11 +39,12 @@ export class CerebrasHandler extends BaseProvider implements SingleCompletionHan
}

getModel(): { id: CerebrasModelId; info: (typeof cerebrasModels)[CerebrasModelId] } {
const modelId = (this.options.apiModelId as CerebrasModelId) || this.defaultProviderModelId
const modelId = this.options.apiModelId as CerebrasModelId
const validModelId = modelId && this.providerModels[modelId] ? modelId : this.defaultProviderModelId

return {
id: modelId,
info: this.providerModels[modelId],
id: validModelId,
info: this.providerModels[validModelId],
}
}

Expand Down Expand Up @@ -130,6 +134,7 @@ export class CerebrasHandler extends BaseProvider implements SingleCompletionHan
...DEFAULT_HEADERS,
"Content-Type": "application/json",
Authorization: `Bearer ${this.apiKey}`,
[CEREBRAS_INTEGRATION_HEADER]: CEREBRAS_INTEGRATION_NAME,
},
body: JSON.stringify(requestBody),
})
Expand Down Expand Up @@ -291,6 +296,7 @@ export class CerebrasHandler extends BaseProvider implements SingleCompletionHan
...DEFAULT_HEADERS,
"Content-Type": "application/json",
Authorization: `Bearer ${this.apiKey}`,
[CEREBRAS_INTEGRATION_HEADER]: CEREBRAS_INTEGRATION_NAME,
},
body: JSON.stringify(requestBody),
})
Expand Down
Loading