Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions crates/goose/src/agents/platform_extensions/apps.rs
Original file line number Diff line number Diff line change
Expand Up @@ -290,8 +290,7 @@ impl AppsManagerClient {
let messages = vec![Message::user().with_text(&user_prompt)];
let tools = vec![Self::create_app_content_tool()];

let mut model_config = provider.get_model_config();
model_config.max_tokens = Some(16384);
let model_config = provider.get_model_config();

let (response, _usage) = provider
.complete(&model_config, session_id, &system_prompt, &messages, &tools)
Expand Down Expand Up @@ -324,8 +323,7 @@ impl AppsManagerClient {
let messages = vec![Message::user().with_text(&user_prompt)];
let tools = vec![Self::update_app_content_tool()];

let mut model_config = provider.get_model_config();
model_config.max_tokens = Some(16384);
let model_config = provider.get_model_config();

let (response, _usage) = provider
.complete(&model_config, session_id, &system_prompt, &messages, &tools)
Expand Down
3 changes: 1 addition & 2 deletions crates/goose/src/model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -280,8 +280,7 @@ impl ModelConfig {
return tokens;
}

// Priority 2: Global default
4_096
16_384
Comment thread
DOsinga marked this conversation as resolved.
Outdated
}

pub fn new_or_fail(model_name: &str) -> ModelConfig {
Expand Down
10 changes: 6 additions & 4 deletions crates/goose/src/providers/formats/anthropic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -431,10 +431,12 @@ pub fn create_request(

let is_thinking_enabled = std::env::var("CLAUDE_THINKING_ENABLED").is_ok();
if is_thinking_enabled {
let budget_tokens = std::env::var("CLAUDE_THINKING_BUDGET")
.unwrap_or_else(|_| "16000".to_string())
.parse()
.unwrap_or(16000);
// Anthropic requires budget_tokens >= 1024
const DEFAULT_THINKING_BUDGET: i32 = 16000;
let budget_tokens: i32 = std::env::var("CLAUDE_THINKING_BUDGET")
.ok()
.and_then(|s| s.parse().ok())
.unwrap_or(DEFAULT_THINKING_BUDGET);
Comment thread
DOsinga marked this conversation as resolved.
Outdated

payload
.as_object_mut()
Expand Down
48 changes: 23 additions & 25 deletions crates/goose/src/providers/formats/databricks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -613,19 +613,19 @@ pub fn create_request(

let is_thinking_enabled = std::env::var("CLAUDE_THINKING_ENABLED").is_ok();
if is_claude_sonnet && is_thinking_enabled {
// Minimum budget_tokens is 1024
let budget_tokens = std::env::var("CLAUDE_THINKING_BUDGET")
.unwrap_or_else(|_| "16000".to_string())
.parse()
.unwrap_or(16000);

// For Claude models with thinking enabled, we need to add max_tokens + budget_tokens
// Default to 8192 (Claude max output) + budget if not specified
let max_completion_tokens = model_config.max_tokens.unwrap_or(8192);
payload.as_object_mut().unwrap().insert(
"max_tokens".to_string(),
json!(max_completion_tokens + budget_tokens),
);
// Anthropic requires budget_tokens >= 1024
const DEFAULT_THINKING_BUDGET: i32 = 16000;
let budget_tokens: i32 = std::env::var("CLAUDE_THINKING_BUDGET")
.ok()
.and_then(|s| s.parse().ok())
.unwrap_or(DEFAULT_THINKING_BUDGET);

Copilot AI Feb 24, 2026

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment says Anthropic requires budget_tokens >= 1024, but CLAUDE_THINKING_BUDGET is parsed without enforcing that minimum (or preventing negatives), which can lead to Databricks request rejection; clamp/validate the parsed value to at least 1024 before using it.

Suggested change
.unwrap_or(DEFAULT_THINKING_BUDGET);
.unwrap_or(DEFAULT_THINKING_BUDGET)
.max(1024);

Copilot uses AI. Check for mistakes.

// With thinking enabled, max_tokens must include both output and thinking budget
let max_tokens = model_config.max_output_tokens() + budget_tokens;
payload
.as_object_mut()
.unwrap()
.insert("max_tokens".to_string(), json!(max_tokens));
Comment on lines +623 to +628

Copilot AI Feb 24, 2026

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

model_config.max_output_tokens() + budget_tokens is an i32 addition that can overflow if CLAUDE_THINKING_BUDGET is set large, which can wrap to a negative max_tokens in release builds; use checked/saturating arithmetic (or compute in i64 and clamp) before serializing.

Copilot uses AI. Check for mistakes.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i32 is 2 billion


payload.as_object_mut().unwrap().insert(
"thinking".to_string(),
Expand All @@ -650,18 +650,16 @@ pub fn create_request(
}
}

// open ai reasoning models use max_completion_tokens instead of max_tokens
if let Some(tokens) = model_config.max_tokens {
let key = if is_openai_reasoning_model {
"max_completion_tokens"
} else {
"max_tokens"
};
payload
.as_object_mut()
.unwrap()
.insert(key.to_string(), json!(tokens));
}
// OpenAI reasoning models use max_completion_tokens instead of max_tokens
let key = if is_openai_reasoning_model {
"max_completion_tokens"
} else {
"max_tokens"
};
payload
.as_object_mut()
.unwrap()
.insert(key.to_string(), json!(model_config.max_output_tokens()));
}

// Apply cache control for Claude models to enable prompt caching
Expand Down
17 changes: 5 additions & 12 deletions crates/goose/src/providers/formats/google.rs
Original file line number Diff line number Diff line change
Expand Up @@ -592,18 +592,11 @@ pub fn create_request(

let thinking_config = get_thinking_config(model_config);

let generation_config = if model_config.temperature.is_some()
|| model_config.max_tokens.is_some()
|| thinking_config.is_some()
{
Some(GenerationConfig {
temperature: model_config.temperature.map(|t| t as f64),
max_output_tokens: model_config.max_tokens,
thinking_config,
})
} else {
None
};
let generation_config = Some(GenerationConfig {
temperature: model_config.temperature.map(|t| t as f64),
max_output_tokens: Some(model_config.max_output_tokens()),
thinking_config,
});

let request = GoogleRequest {
system_instruction: SystemInstruction {
Expand Down
22 changes: 10 additions & 12 deletions crates/goose/src/providers/formats/openai.rs
Original file line number Diff line number Diff line change
Expand Up @@ -828,18 +828,16 @@ pub fn create_request(
}
}

// o1 models use max_completion_tokens instead of max_tokens
if let Some(tokens) = model_config.max_tokens {
let key = if is_ox_model {
"max_completion_tokens"
} else {
"max_tokens"
};
payload
.as_object_mut()
.unwrap()
.insert(key.to_string(), json!(tokens));
}
// o1/o3 models use max_completion_tokens instead of max_tokens
let key = if is_ox_model {
"max_completion_tokens"
} else {
"max_tokens"
};
payload
.as_object_mut()
.unwrap()
.insert(key.to_string(), json!(model_config.max_output_tokens()));

if for_streaming {
payload["stream"] = json!(true);
Expand Down
10 changes: 4 additions & 6 deletions crates/goose/src/providers/formats/openai_responses.rs
Original file line number Diff line number Diff line change
Expand Up @@ -460,12 +460,10 @@ pub fn create_responses_request(
.insert("temperature".to_string(), json!(temp));
}

if let Some(tokens) = model_config.max_tokens {
payload
.as_object_mut()
.unwrap()
.insert("max_output_tokens".to_string(), json!(tokens));
}
payload.as_object_mut().unwrap().insert(
"max_output_tokens".to_string(),
json!(model_config.max_output_tokens()),
);

Ok(payload)
}
Expand Down
3 changes: 1 addition & 2 deletions crates/goose/src/providers/formats/snowflake.rs
Original file line number Diff line number Diff line change
Expand Up @@ -353,11 +353,10 @@ pub fn create_request(
format_tools(tools)
};

let max_tokens = model_config.max_tokens.unwrap_or(4096);
let mut payload = json!({
"model": model_config.model_name,
"messages": snowflake_messages,
"max_tokens": max_tokens,
"max_tokens": model_config.max_output_tokens(),
});

// Add tools if present and not a description request
Expand Down
2 changes: 1 addition & 1 deletion crates/goose/src/providers/sagemaker_tgi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ impl SageMakerTgiProvider {
let request = json!({
"inputs": prompt,
"parameters": {
"max_new_tokens": self.model.max_tokens.unwrap_or(150),
"max_new_tokens": self.model.max_output_tokens(),
"temperature": self.model.temperature.unwrap_or(0.7),
Comment on lines 151 to 153

Copilot AI Feb 24, 2026

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Switching TGI's max_new_tokens from a small provider-specific default (150) to model.max_output_tokens() means an unset/unknown model will now request 16,384 tokens by default, which is likely to cause slow/expensive generations or SageMaker endpoint errors; consider keeping a conservative TGI fallback (e.g., use max_tokens only when explicitly set, otherwise default to ~150 or a configurable env var).

Copilot uses AI. Check for mistakes.
"do_sample": true,
"return_full_text": false
Expand Down
Loading