diff --git a/packages/types/src/providers/cerebras.ts b/packages/types/src/providers/cerebras.ts index be705744111..1ac8f637040 100644 --- a/packages/types/src/providers/cerebras.ts +++ b/packages/types/src/providers/cerebras.ts @@ -7,7 +7,7 @@ export const cerebrasDefaultModelId: CerebrasModelId = "gpt-oss-120b" export const cerebrasModels = { "zai-glm-4.6": { - maxTokens: 16384, // consistent with their other models + maxTokens: 8192, // Conservative default to avoid premature rate limiting (Cerebras reserves quota upfront) contextWindow: 131072, supportsImages: false, supportsPromptCache: false, @@ -17,7 +17,7 @@ export const cerebrasModels = { description: "Highly intelligent general purpose model with up to 1,000 tokens/s", }, "qwen-3-235b-a22b-instruct-2507": { - maxTokens: 64000, + maxTokens: 8192, // Conservative default to avoid premature rate limiting contextWindow: 64000, supportsImages: false, supportsPromptCache: false, @@ -27,7 +27,7 @@ export const cerebrasModels = { description: "Intelligent model with ~1400 tokens/s", }, "llama-3.3-70b": { - maxTokens: 64000, + maxTokens: 8192, // Conservative default to avoid premature rate limiting contextWindow: 64000, supportsImages: false, supportsPromptCache: false, @@ -37,7 +37,7 @@ export const cerebrasModels = { description: "Powerful model with ~2600 tokens/s", }, "qwen-3-32b": { - maxTokens: 64000, + maxTokens: 8192, // Conservative default to avoid premature rate limiting contextWindow: 64000, supportsImages: false, supportsPromptCache: false, @@ -47,7 +47,7 @@ export const cerebrasModels = { description: "SOTA coding performance with ~2500 tokens/s", }, "gpt-oss-120b": { - maxTokens: 8000, + maxTokens: 8192, // Conservative default to avoid premature rate limiting contextWindow: 64000, supportsImages: false, supportsPromptCache: false, diff --git a/src/api/providers/cerebras.ts b/src/api/providers/cerebras.ts index 398e32f4901..99e7c4cc3d4 100644 --- a/src/api/providers/cerebras.ts +++ b/src/api/providers/cerebras.ts @@ -16,6 +16,9 @@ import { t } from "../../i18n" const CEREBRAS_BASE_URL = "https://api.cerebras.ai/v1" const CEREBRAS_DEFAULT_TEMPERATURE = 0 +const CEREBRAS_INTEGRATION_HEADER = "X-Cerebras-3rd-Party-Integration" +const CEREBRAS_INTEGRATION_NAME = "roocode" + export class CerebrasHandler extends BaseProvider implements SingleCompletionHandler { private apiKey: string private providerModels: typeof cerebrasModels @@ -36,11 +39,12 @@ export class CerebrasHandler extends BaseProvider implements SingleCompletionHan } getModel(): { id: CerebrasModelId; info: (typeof cerebrasModels)[CerebrasModelId] } { - const modelId = (this.options.apiModelId as CerebrasModelId) || this.defaultProviderModelId + const modelId = this.options.apiModelId as CerebrasModelId + const validModelId = modelId && this.providerModels[modelId] ? modelId : this.defaultProviderModelId return { - id: modelId, - info: this.providerModels[modelId], + id: validModelId, + info: this.providerModels[validModelId], } } @@ -130,6 +134,7 @@ export class CerebrasHandler extends BaseProvider implements SingleCompletionHan ...DEFAULT_HEADERS, "Content-Type": "application/json", Authorization: `Bearer ${this.apiKey}`, + [CEREBRAS_INTEGRATION_HEADER]: CEREBRAS_INTEGRATION_NAME, }, body: JSON.stringify(requestBody), }) @@ -291,6 +296,7 @@ export class CerebrasHandler extends BaseProvider implements SingleCompletionHan ...DEFAULT_HEADERS, "Content-Type": "application/json", Authorization: `Bearer ${this.apiKey}`, + [CEREBRAS_INTEGRATION_HEADER]: CEREBRAS_INTEGRATION_NAME, }, body: JSON.stringify(requestBody), })