Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions crates/goose/src/agents/platform_extensions/apps.rs
Original file line number Diff line number Diff line change
Expand Up @@ -290,8 +290,7 @@ impl AppsManagerClient {
let messages = vec![Message::user().with_text(&user_prompt)];
let tools = vec![Self::create_app_content_tool()];

let mut model_config = provider.get_model_config();
model_config.max_tokens = Some(16384);
let model_config = provider.get_model_config();

let (response, _usage) = provider
.complete(&model_config, session_id, &system_prompt, &messages, &tools)
Expand Down Expand Up @@ -324,8 +323,7 @@ impl AppsManagerClient {
let messages = vec![Message::user().with_text(&user_prompt)];
let tools = vec![Self::update_app_content_tool()];

let mut model_config = provider.get_model_config();
model_config.max_tokens = Some(16384);
let model_config = provider.get_model_config();

let (response, _usage) = provider
.complete(&model_config, session_id, &system_prompt, &messages, &tools)
Expand Down
1 change: 0 additions & 1 deletion crates/goose/src/model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,6 @@ impl ModelConfig {
return tokens;
}

// Priority 2: Global default
4_096
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was separately thinking about making this bigger. Too small seems strictly worse as it can lead to truncation.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had it higher for a bit, but presumably we almost always have a real value here so then erring on a lower side is probably better

}

Expand Down
11 changes: 7 additions & 4 deletions crates/goose/src/providers/formats/anthropic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -431,10 +431,13 @@ pub fn create_request(

let is_thinking_enabled = std::env::var("CLAUDE_THINKING_ENABLED").is_ok();
if is_thinking_enabled {
let budget_tokens = std::env::var("CLAUDE_THINKING_BUDGET")
.unwrap_or_else(|_| "16000".to_string())
.parse()
.unwrap_or(16000);
// Anthropic requires budget_tokens >= 1024
const DEFAULT_THINKING_BUDGET: i32 = 16000;
let raw_budget_tokens: i32 = std::env::var("CLAUDE_THINKING_BUDGET")
.ok()
.and_then(|s| s.parse().ok())
.unwrap_or(DEFAULT_THINKING_BUDGET);
let budget_tokens: i32 = std::cmp::max(1024, raw_budget_tokens);

payload
.as_object_mut()
Expand Down
48 changes: 23 additions & 25 deletions crates/goose/src/providers/formats/databricks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -613,19 +613,19 @@ pub fn create_request(

let is_thinking_enabled = std::env::var("CLAUDE_THINKING_ENABLED").is_ok();
if is_claude_sonnet && is_thinking_enabled {
// Minimum budget_tokens is 1024
let budget_tokens = std::env::var("CLAUDE_THINKING_BUDGET")
.unwrap_or_else(|_| "16000".to_string())
.parse()
.unwrap_or(16000);

// For Claude models with thinking enabled, we need to add max_tokens + budget_tokens
// Default to 8192 (Claude max output) + budget if not specified
let max_completion_tokens = model_config.max_tokens.unwrap_or(8192);
payload.as_object_mut().unwrap().insert(
"max_tokens".to_string(),
json!(max_completion_tokens + budget_tokens),
);
// Anthropic requires budget_tokens >= 1024
const DEFAULT_THINKING_BUDGET: i32 = 16000;
let budget_tokens: i32 = std::env::var("CLAUDE_THINKING_BUDGET")
.ok()
.and_then(|s| s.parse().ok())
.unwrap_or(DEFAULT_THINKING_BUDGET);
Copy link

Copilot AI Feb 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment says Anthropic requires budget_tokens >= 1024, but CLAUDE_THINKING_BUDGET is parsed without enforcing that minimum (or preventing negatives), which can lead to Databricks request rejection; clamp/validate the parsed value to at least 1024 before using it.

Suggested change
.unwrap_or(DEFAULT_THINKING_BUDGET);
.unwrap_or(DEFAULT_THINKING_BUDGET)
.max(1024);

Copilot uses AI. Check for mistakes.

// With thinking enabled, max_tokens must include both output and thinking budget
let max_tokens = model_config.max_output_tokens() + budget_tokens;
payload
.as_object_mut()
.unwrap()
.insert("max_tokens".to_string(), json!(max_tokens));
Comment on lines +623 to +628
Copy link

Copilot AI Feb 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

model_config.max_output_tokens() + budget_tokens is an i32 addition that can overflow if CLAUDE_THINKING_BUDGET is set large, which can wrap to a negative max_tokens in release builds; use checked/saturating arithmetic (or compute in i64 and clamp) before serializing.

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i32 is 2 billion


payload.as_object_mut().unwrap().insert(
"thinking".to_string(),
Expand All @@ -650,18 +650,16 @@ pub fn create_request(
}
}

// open ai reasoning models use max_completion_tokens instead of max_tokens
if let Some(tokens) = model_config.max_tokens {
let key = if is_openai_reasoning_model {
"max_completion_tokens"
} else {
"max_tokens"
};
payload
.as_object_mut()
.unwrap()
.insert(key.to_string(), json!(tokens));
}
// OpenAI reasoning models use max_completion_tokens instead of max_tokens
let key = if is_openai_reasoning_model {
"max_completion_tokens"
} else {
"max_tokens"
};
payload
.as_object_mut()
.unwrap()
.insert(key.to_string(), json!(model_config.max_output_tokens()));
}

// Apply cache control for Claude models to enable prompt caching
Expand Down
17 changes: 5 additions & 12 deletions crates/goose/src/providers/formats/google.rs
Original file line number Diff line number Diff line change
Expand Up @@ -592,18 +592,11 @@ pub fn create_request(

let thinking_config = get_thinking_config(model_config);

let generation_config = if model_config.temperature.is_some()
|| model_config.max_tokens.is_some()
|| thinking_config.is_some()
{
Some(GenerationConfig {
temperature: model_config.temperature.map(|t| t as f64),
max_output_tokens: model_config.max_tokens,
thinking_config,
})
} else {
None
};
let generation_config = Some(GenerationConfig {
temperature: model_config.temperature.map(|t| t as f64),
max_output_tokens: Some(model_config.max_output_tokens()),
thinking_config,
});

let request = GoogleRequest {
system_instruction: SystemInstruction {
Expand Down
22 changes: 10 additions & 12 deletions crates/goose/src/providers/formats/openai.rs
Original file line number Diff line number Diff line change
Expand Up @@ -828,18 +828,16 @@ pub fn create_request(
}
}

// o1 models use max_completion_tokens instead of max_tokens
if let Some(tokens) = model_config.max_tokens {
let key = if is_ox_model {
"max_completion_tokens"
} else {
"max_tokens"
};
payload
.as_object_mut()
.unwrap()
.insert(key.to_string(), json!(tokens));
}
// o1/o3 models use max_completion_tokens instead of max_tokens
let key = if is_ox_model {
"max_completion_tokens"
} else {
"max_tokens"
};
payload
.as_object_mut()
.unwrap()
.insert(key.to_string(), json!(model_config.max_output_tokens()));

if for_streaming {
payload["stream"] = json!(true);
Expand Down
10 changes: 4 additions & 6 deletions crates/goose/src/providers/formats/openai_responses.rs
Original file line number Diff line number Diff line change
Expand Up @@ -460,12 +460,10 @@ pub fn create_responses_request(
.insert("temperature".to_string(), json!(temp));
}

if let Some(tokens) = model_config.max_tokens {
payload
.as_object_mut()
.unwrap()
.insert("max_output_tokens".to_string(), json!(tokens));
}
payload.as_object_mut().unwrap().insert(
"max_output_tokens".to_string(),
json!(model_config.max_output_tokens()),
);

Ok(payload)
}
Expand Down
3 changes: 1 addition & 2 deletions crates/goose/src/providers/formats/snowflake.rs
Original file line number Diff line number Diff line change
Expand Up @@ -353,11 +353,10 @@ pub fn create_request(
format_tools(tools)
};

let max_tokens = model_config.max_tokens.unwrap_or(4096);
let mut payload = json!({
"model": model_config.model_name,
"messages": snowflake_messages,
"max_tokens": max_tokens,
"max_tokens": model_config.max_output_tokens(),
});

// Add tools if present and not a description request
Expand Down
2 changes: 1 addition & 1 deletion crates/goose/src/providers/sagemaker_tgi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ impl SageMakerTgiProvider {
let request = json!({
"inputs": prompt,
"parameters": {
"max_new_tokens": self.model.max_tokens.unwrap_or(150),
"max_new_tokens": self.model.max_output_tokens(),
"temperature": self.model.temperature.unwrap_or(0.7),
Comment on lines 151 to 153
Copy link

Copilot AI Feb 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Switching TGI's max_new_tokens from a small provider-specific default (150) to model.max_output_tokens() means an unset/unknown model will now request 16,384 tokens by default, which is likely to cause slow/expensive generations or SageMaker endpoint errors; consider keeping a conservative TGI fallback (e.g., use max_tokens only when explicitly set, otherwise default to ~150 or a configurable env var).

Copilot uses AI. Check for mistakes.
"do_sample": true,
"return_full_text": false
Expand Down
Loading