@@ -7,7 +7,7 @@ export const cerebrasDefaultModelId: CerebrasModelId = "gpt-oss-120b"
77
88export const cerebrasModels = {
99 "zai-glm-4.6" : {
10- maxTokens : 16384 , // consistent with their other models
10+ maxTokens : 8192 , // Conservative default to avoid premature rate limiting (Cerebras reserves quota upfront)
1111 contextWindow : 131072 ,
1212 supportsImages : false ,
1313 supportsPromptCache : false ,
@@ -17,7 +17,7 @@ export const cerebrasModels = {
1717 description : "Highly intelligent general purpose model with up to 1,000 tokens/s" ,
1818 } ,
1919 "qwen-3-235b-a22b-instruct-2507" : {
20- maxTokens : 64000 ,
20+ maxTokens : 8192 , // Conservative default to avoid premature rate limiting
2121 contextWindow : 64000 ,
2222 supportsImages : false ,
2323 supportsPromptCache : false ,
@@ -27,7 +27,7 @@ export const cerebrasModels = {
2727 description : "Intelligent model with ~1400 tokens/s" ,
2828 } ,
2929 "llama-3.3-70b" : {
30- maxTokens : 64000 ,
30+ maxTokens : 8192 , // Conservative default to avoid premature rate limiting
3131 contextWindow : 64000 ,
3232 supportsImages : false ,
3333 supportsPromptCache : false ,
@@ -37,7 +37,7 @@ export const cerebrasModels = {
3737 description : "Powerful model with ~2600 tokens/s" ,
3838 } ,
3939 "qwen-3-32b" : {
40- maxTokens : 64000 ,
40+ maxTokens : 8192 , // Conservative default to avoid premature rate limiting
4141 contextWindow : 64000 ,
4242 supportsImages : false ,
4343 supportsPromptCache : false ,
@@ -47,7 +47,7 @@ export const cerebrasModels = {
4747 description : "SOTA coding performance with ~2500 tokens/s" ,
4848 } ,
4949 "gpt-oss-120b" : {
50- maxTokens : 8000 ,
50+ maxTokens : 8192 , // Conservative default to avoid premature rate limiting
5151 contextWindow : 64000 ,
5252 supportsImages : false ,
5353 supportsPromptCache : false ,
0 commit comments