Skip to content

Commit 94c997c

Browse files
Fix/cerebras conservative max tokens (#9804)
Co-authored-by: Matt Rubens <[email protected]>
1 parent 573cfc3 commit 94c997c

File tree

2 files changed

+14
-8
lines changed

2 files changed

+14
-8
lines changed

packages/types/src/providers/cerebras.ts

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ export const cerebrasDefaultModelId: CerebrasModelId = "gpt-oss-120b"
77

88
export const cerebrasModels = {
99
"zai-glm-4.6": {
10-
maxTokens: 16384, // consistent with their other models
10+
maxTokens: 8192, // Conservative default to avoid premature rate limiting (Cerebras reserves quota upfront)
1111
contextWindow: 131072,
1212
supportsImages: false,
1313
supportsPromptCache: false,
@@ -17,7 +17,7 @@ export const cerebrasModels = {
1717
description: "Highly intelligent general purpose model with up to 1,000 tokens/s",
1818
},
1919
"qwen-3-235b-a22b-instruct-2507": {
20-
maxTokens: 64000,
20+
maxTokens: 8192, // Conservative default to avoid premature rate limiting
2121
contextWindow: 64000,
2222
supportsImages: false,
2323
supportsPromptCache: false,
@@ -27,7 +27,7 @@ export const cerebrasModels = {
2727
description: "Intelligent model with ~1400 tokens/s",
2828
},
2929
"llama-3.3-70b": {
30-
maxTokens: 64000,
30+
maxTokens: 8192, // Conservative default to avoid premature rate limiting
3131
contextWindow: 64000,
3232
supportsImages: false,
3333
supportsPromptCache: false,
@@ -37,7 +37,7 @@ export const cerebrasModels = {
3737
description: "Powerful model with ~2600 tokens/s",
3838
},
3939
"qwen-3-32b": {
40-
maxTokens: 64000,
40+
maxTokens: 8192, // Conservative default to avoid premature rate limiting
4141
contextWindow: 64000,
4242
supportsImages: false,
4343
supportsPromptCache: false,
@@ -47,7 +47,7 @@ export const cerebrasModels = {
4747
description: "SOTA coding performance with ~2500 tokens/s",
4848
},
4949
"gpt-oss-120b": {
50-
maxTokens: 8000,
50+
maxTokens: 8192, // Conservative default to avoid premature rate limiting
5151
contextWindow: 64000,
5252
supportsImages: false,
5353
supportsPromptCache: false,

src/api/providers/cerebras.ts

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@ import { t } from "../../i18n"
1616
const CEREBRAS_BASE_URL = "https://api.cerebras.ai/v1"
1717
const CEREBRAS_DEFAULT_TEMPERATURE = 0
1818

19+
const CEREBRAS_INTEGRATION_HEADER = "X-Cerebras-3rd-Party-Integration"
20+
const CEREBRAS_INTEGRATION_NAME = "roocode"
21+
1922
export class CerebrasHandler extends BaseProvider implements SingleCompletionHandler {
2023
private apiKey: string
2124
private providerModels: typeof cerebrasModels
@@ -36,11 +39,12 @@ export class CerebrasHandler extends BaseProvider implements SingleCompletionHan
3639
}
3740

3841
getModel(): { id: CerebrasModelId; info: (typeof cerebrasModels)[CerebrasModelId] } {
39-
const modelId = (this.options.apiModelId as CerebrasModelId) || this.defaultProviderModelId
42+
const modelId = this.options.apiModelId as CerebrasModelId
43+
const validModelId = modelId && this.providerModels[modelId] ? modelId : this.defaultProviderModelId
4044

4145
return {
42-
id: modelId,
43-
info: this.providerModels[modelId],
46+
id: validModelId,
47+
info: this.providerModels[validModelId],
4448
}
4549
}
4650

@@ -130,6 +134,7 @@ export class CerebrasHandler extends BaseProvider implements SingleCompletionHan
130134
...DEFAULT_HEADERS,
131135
"Content-Type": "application/json",
132136
Authorization: `Bearer ${this.apiKey}`,
137+
[CEREBRAS_INTEGRATION_HEADER]: CEREBRAS_INTEGRATION_NAME,
133138
},
134139
body: JSON.stringify(requestBody),
135140
})
@@ -291,6 +296,7 @@ export class CerebrasHandler extends BaseProvider implements SingleCompletionHan
291296
...DEFAULT_HEADERS,
292297
"Content-Type": "application/json",
293298
Authorization: `Bearer ${this.apiKey}`,
299+
[CEREBRAS_INTEGRATION_HEADER]: CEREBRAS_INTEGRATION_NAME,
294300
},
295301
body: JSON.stringify(requestBody),
296302
})

0 commit comments

Comments
 (0)