Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions crates/goose/src/context_mgmt/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,7 @@ mod tests {
toolshim_model: None,
fast_model_config: None,
request_params: None,
reasoning: None,
},
max_tool_responses: None,
}
Expand Down
118 changes: 107 additions & 11 deletions crates/goose/src/model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ pub struct ModelConfig {
/// Provider-specific request parameters (e.g., anthropic_beta headers)
#[serde(default, skip_serializing_if = "Option::is_none")]
pub request_params: Option<HashMap<String, Value>>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub reasoning: Option<bool>,
}

impl ModelConfig {
Expand Down Expand Up @@ -104,21 +106,22 @@ impl ModelConfig {
toolshim_model,
fast_model_config: None,
request_params,
reasoning: None,
})
}

pub fn with_canonical_limits(mut self, provider_name: &str) -> Self {
if self.context_limit.is_none() || self.max_tokens.is_none() {
if let Some(canonical) = crate::providers::canonical::maybe_get_canonical_model(
provider_name,
&self.model_name,
) {
if self.context_limit.is_none() {
self.context_limit = Some(canonical.limit.context);
}
if self.max_tokens.is_none() {
self.max_tokens = canonical.limit.output.map(|o| o as i32);
}
if let Some(canonical) =
crate::providers::canonical::maybe_get_canonical_model(provider_name, &self.model_name)
{
if self.context_limit.is_none() {
self.context_limit = Some(canonical.limit.context);
}
if self.max_tokens.is_none() {
self.max_tokens = canonical.limit.output.map(|o| o as i32);
}
if self.reasoning.is_none() {
self.reasoning = canonical.reasoning;
}
}

Expand Down Expand Up @@ -275,6 +278,18 @@ impl ModelConfig {
self.context_limit.unwrap_or(DEFAULT_CONTEXT_LIMIT)
}

pub fn is_openai_reasoning_model(&self) -> bool {
const DATABRICKS_MODEL_NAME_PREFIXES: &[&str] = &["goose-", "databricks-"];
const REASONING_PREFIXES: &[&str] = &["o1", "o3", "o4", "gpt-5"];
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Keep o2 models classified as reasoning

is_openai_reasoning_model no longer treats o2* names as reasoning models because REASONING_PREFIXES dropped "o2". This helper now drives request shaping in both OpenAI and Databricks formatters, so o2 requests will be sent down the non-reasoning path (including temperature and without reasoning-specific handling), which can trigger 400s or behavior mismatches on endpoints that still expose o2 under o-series semantics.

Useful? React with 👍 / 👎.


let base = DATABRICKS_MODEL_NAME_PREFIXES
.iter()
.find_map(|p| self.model_name.strip_prefix(p))
.unwrap_or(&self.model_name);

REASONING_PREFIXES.iter().any(|p| base.starts_with(p))
}

pub fn max_output_tokens(&self) -> i32 {
if let Some(tokens) = self.max_tokens {
return tokens;
Expand Down Expand Up @@ -408,4 +423,85 @@ mod tests {
None
);
}

mod with_canonical_limits {
use super::*;

#[test]
fn sets_limits_from_canonical_model() {
let config = ModelConfig::new_or_fail("gpt-4o").with_canonical_limits("openai");

assert_eq!(config.context_limit, Some(128_000));
assert_eq!(config.max_tokens, Some(16_384));
assert_eq!(config.reasoning, Some(false));
}

#[test]
fn does_not_override_existing_context_limit() {
let mut config = ModelConfig::new_or_fail("gpt-4o");
config.context_limit = Some(64_000);
let config = config.with_canonical_limits("openai");

assert_eq!(config.context_limit, Some(64_000));
}

#[test]
fn does_not_override_existing_max_tokens() {
let mut config = ModelConfig::new_or_fail("gpt-4o");
config.max_tokens = Some(1_000);
let config = config.with_canonical_limits("openai");

assert_eq!(config.max_tokens, Some(1_000));
}

#[test]
fn unknown_model_leaves_fields_none() {
let config =
ModelConfig::new_or_fail("totally-unknown-model").with_canonical_limits("openai");

assert_eq!(config.context_limit, None);
assert_eq!(config.max_tokens, None);
assert_eq!(config.reasoning, None);
}
}

mod is_openai_reasoning_model {
use super::*;

#[test]
fn bare_reasoning_models() {
assert!(ModelConfig::new_or_fail("o1").is_openai_reasoning_model());
assert!(ModelConfig::new_or_fail("o1-preview").is_openai_reasoning_model());
assert!(ModelConfig::new_or_fail("o3").is_openai_reasoning_model());
assert!(ModelConfig::new_or_fail("o3-mini").is_openai_reasoning_model());
assert!(ModelConfig::new_or_fail("o4-mini").is_openai_reasoning_model());
assert!(ModelConfig::new_or_fail("gpt-5").is_openai_reasoning_model());
assert!(ModelConfig::new_or_fail("gpt-5-3-codex").is_openai_reasoning_model());
}

#[test]
fn goose_prefixed_reasoning_models() {
assert!(ModelConfig::new_or_fail("goose-o3-mini").is_openai_reasoning_model());
assert!(ModelConfig::new_or_fail("goose-o4-mini").is_openai_reasoning_model());
assert!(ModelConfig::new_or_fail("goose-gpt-5").is_openai_reasoning_model());
}

#[test]
fn databricks_prefixed_reasoning_models() {
assert!(ModelConfig::new_or_fail("databricks-o3-mini").is_openai_reasoning_model());
assert!(ModelConfig::new_or_fail("databricks-o4-mini").is_openai_reasoning_model());
assert!(ModelConfig::new_or_fail("databricks-gpt-5").is_openai_reasoning_model());
}

#[test]
fn non_reasoning_models() {
assert!(!ModelConfig::new_or_fail("claude-sonnet-4").is_openai_reasoning_model());
assert!(!ModelConfig::new_or_fail("gpt-4o").is_openai_reasoning_model());
assert!(
!ModelConfig::new_or_fail("databricks-claude-sonnet-4").is_openai_reasoning_model()
);
assert!(!ModelConfig::new_or_fail("goose-claude-sonnet-4").is_openai_reasoning_model());
assert!(!ModelConfig::new_or_fail("llama-3-70b").is_openai_reasoning_model());
}
}
}
1 change: 1 addition & 0 deletions crates/goose/src/providers/bedrock.rs
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,7 @@ mod tests {
toolshim_model: None,
fast_model_config: None,
request_params: None,
reasoning: None,
},
retry_config: RetryConfig::default(),
name: "aws_bedrock".to_string(),
Expand Down
2 changes: 0 additions & 2 deletions crates/goose/src/providers/databricks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,8 @@ pub const DATABRICKS_DEFAULT_MODEL: &str = "databricks-claude-sonnet-4";
const DATABRICKS_DEFAULT_FAST_MODEL: &str = "databricks-claude-haiku-4-5";
pub const DATABRICKS_KNOWN_MODELS: &[&str] = &[
"databricks-claude-sonnet-4-5",
"databricks-claude-3-7-sonnet",
"databricks-meta-llama-3-3-70b-instruct",
"databricks-meta-llama-3-1-405b-instruct",
Comment on lines 47 to 50
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Restore removed Databricks models in known model metadata

DATABRICKS_KNOWN_MODELS now omits databricks-claude-3-7-sonnet and databricks-dbrx-instruct, but this constant is still passed into ProviderMetadata::new(...) for the Databricks provider and is used as the static/fallback model catalog exposed to clients. Removing these entries makes still-referenced Databricks models undiscoverable in provider metadata (including UI selection/fallback flows when dynamic model fetch is unavailable), even though the codebase still treats them as valid model names in other places.

Useful? React with 👍 / 👎.

"databricks-dbrx-instruct",
];

pub const DATABRICKS_DOC_URL: &str =
Expand Down
27 changes: 10 additions & 17 deletions crates/goose/src/providers/formats/databricks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -545,14 +545,10 @@ pub fn create_request(
}

let model_name = model_config.model_name.to_string();
let is_o1 = model_name.starts_with("o1") || model_name.starts_with("goose-o1");
let is_o3 = model_name.starts_with("o3") || model_name.starts_with("goose-o3");
let is_gpt_5 = model_name.starts_with("gpt-5") || model_name.starts_with("goose-gpt-5");
let is_openai_reasoning_model = is_o1 || is_o3 || is_gpt_5;
let is_openai_reasoning_model = model_config.is_openai_reasoning_model();
let is_claude_sonnet =
model_name.contains("claude-3-7-sonnet") || model_name.contains("claude-4-sonnet"); // can be goose- or databricks-

// Only extract reasoning effort for O1/O3 models
let (model_name, reasoning_effort) = if is_openai_reasoning_model {
let parts: Vec<&str> = model_config.model_name.split('-').collect();
let last_part = parts.last().unwrap();
Expand All @@ -568,7 +564,6 @@ pub fn create_request(
),
}
} else {
// For non-O family models, use the model name as is and no reasoning effort
(model_config.model_name.to_string(), None)
};

Expand Down Expand Up @@ -650,16 +645,10 @@ pub fn create_request(
}
}

// OpenAI reasoning models use max_completion_tokens instead of max_tokens
let key = if is_openai_reasoning_model {
"max_completion_tokens"
} else {
"max_tokens"
};
payload
.as_object_mut()
.unwrap()
.insert(key.to_string(), json!(model_config.max_output_tokens()));
payload.as_object_mut().unwrap().insert(
"max_completion_tokens".to_string(),
json!(model_config.max_output_tokens()),
Comment on lines +648 to +650
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Preserve max_tokens for non-reasoning Databricks requests

create_request now always emits max_completion_tokens in the normal (non-CLAUDE_THINKING_ENABLED) path, so non-reasoning Databricks models no longer receive max_tokens. This path is used for standard Databricks chat invocations (for example llama/claude endpoints), and switching the parameter can cause provider-side 400s or ignored output limits on backends that still implement chat-completions semantics with max_tokens; keep max_completion_tokens only for reasoning-model payloads.

Useful? React with 👍 / 👎.

);
}

// Apply cache control for Claude models to enable prompt caching
Expand Down Expand Up @@ -1056,6 +1045,7 @@ mod tests {
toolshim_model: None,
fast_model_config: None,
request_params: None,
reasoning: None,
};
let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?;
let obj = request.as_object().unwrap();
Expand All @@ -1067,7 +1057,7 @@ mod tests {
"content": "system"
}
],
"max_tokens": 1024
"max_completion_tokens": 1024
});

for (key, value) in expected.as_object().unwrap() {
Expand All @@ -1088,6 +1078,7 @@ mod tests {
toolshim_model: None,
fast_model_config: None,
request_params: None,
reasoning: None,
};
let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?;
assert_eq!(request["reasoning_effort"], "high");
Expand Down Expand Up @@ -1440,6 +1431,7 @@ mod tests {
toolshim_model: None,
fast_model_config: None,
request_params: None,
reasoning: None,
};

let messages = vec![
Expand Down Expand Up @@ -1492,6 +1484,7 @@ mod tests {
toolshim_model: None,
fast_model_config: None,
request_params: None,
reasoning: None,
};

let messages = vec![Message::user().with_text("Hello")];
Expand Down
35 changes: 13 additions & 22 deletions crates/goose/src/providers/formats/openai.rs
Original file line number Diff line number Diff line change
Expand Up @@ -769,14 +769,9 @@ pub fn create_request(
));
}

let is_ox_model = model_config.model_name.starts_with("o1")
|| model_config.model_name.starts_with("o2")
|| model_config.model_name.starts_with("o3")
|| model_config.model_name.starts_with("o4")
|| model_config.model_name.starts_with("gpt-5");

// Only extract reasoning effort for O-series models
let (model_name, reasoning_effort) = if is_ox_model {
let is_reasoning_model = model_config.is_openai_reasoning_model();

let (model_name, reasoning_effort) = if is_reasoning_model {
let parts: Vec<&str> = model_config.model_name.split('-').collect();
let last_part = parts.last().unwrap();

Expand All @@ -791,12 +786,11 @@ pub fn create_request(
),
}
} else {
// For non-O family models, use the model name as is and no reasoning effort
(model_config.model_name.to_string(), None)
};

let system_message = json!({
"role": if is_ox_model { "developer" } else { "system" },
"role": if is_reasoning_model { "developer" } else { "system" },
"content": system
});

Expand All @@ -822,22 +816,16 @@ pub fn create_request(
}

// o1, o3 models currently don't support temperature
if !is_ox_model {
if !is_reasoning_model {
if let Some(temp) = model_config.temperature {
payload["temperature"] = json!(temp);
}
}

// o1/o3 models use max_completion_tokens instead of max_tokens
let key = if is_ox_model {
"max_completion_tokens"
} else {
"max_tokens"
};
payload
.as_object_mut()
.unwrap()
.insert(key.to_string(), json!(model_config.max_output_tokens()));
payload.as_object_mut().unwrap().insert(
"max_completion_tokens".to_string(),
json!(model_config.max_output_tokens()),
Comment on lines +825 to +827
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Restore max_tokens for non-reasoning chat completions

This now unconditionally writes max_completion_tokens, but this formatter is shared by multiple non-OpenAI providers (for example openai_compatible, ollama, tetrate, githubcopilot, litellm, and openrouter) that still post to /chat/completions schemas expecting max_tokens; with this change, non-reasoning requests no longer include max_tokens at all, which can cause 400s or silently drop output limits on those backends. Keep max_completion_tokens for reasoning models, but preserve a max_tokens path for standard chat-completions models.

Useful? React with 👍 / 👎.

);

if for_streaming {
payload["stream"] = json!(true);
Expand Down Expand Up @@ -1500,6 +1488,7 @@ mod tests {
toolshim_model: None,
fast_model_config: None,
request_params: None,
reasoning: None,
};
let request = create_request(
&model_config,
Expand All @@ -1518,7 +1507,7 @@ mod tests {
"content": "system"
}
],
"max_tokens": 1024
"max_completion_tokens": 1024
});

for (key, value) in expected.as_object().unwrap() {
Expand All @@ -1540,6 +1529,7 @@ mod tests {
toolshim_model: None,
fast_model_config: None,
request_params: None,
reasoning: None,
};
let request = create_request(
&model_config,
Expand Down Expand Up @@ -1581,6 +1571,7 @@ mod tests {
toolshim_model: None,
fast_model_config: None,
request_params: None,
reasoning: None,
};
let request = create_request(
&model_config,
Expand Down
1 change: 1 addition & 0 deletions crates/goose/src/providers/formats/openai_responses.rs
Original file line number Diff line number Diff line change
Expand Up @@ -827,6 +827,7 @@ mod tests {
toolshim_model: None,
fast_model_config: None,
request_params: None,
reasoning: None,
};

let messages = vec![
Expand Down
4 changes: 4 additions & 0 deletions ui/desktop/openapi.json
Original file line number Diff line number Diff line change
Expand Up @@ -5826,6 +5826,10 @@
"model_name": {
"type": "string"
},
"reasoning": {
"type": "boolean",
"nullable": true
},
"request_params": {
"type": "object",
"description": "Provider-specific request parameters (e.g., anthropic_beta headers)",
Expand Down
1 change: 1 addition & 0 deletions ui/desktop/src/api/types.gen.ts
Original file line number Diff line number Diff line change
Expand Up @@ -689,6 +689,7 @@ export type ModelConfig = {
context_limit?: number | null;
max_tokens?: number | null;
model_name: string;
reasoning?: boolean | null;
/**
* Provider-specific request parameters (e.g., anthropic_beta headers)
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,11 @@ export const SwitchModelModal = ({
if (claudeThinkingType === 'adaptive') {
upsert('CLAUDE_THINKING_EFFORT', claudeThinkingEffort, false).catch(console.warn);
} else if (claudeThinkingType === 'enabled') {
upsert('CLAUDE_THINKING_BUDGET', parseInt(claudeThinkingBudget, 10) || 16000, false).catch(console.warn);
upsert(
'CLAUDE_THINKING_BUDGET',
parseInt(claudeThinkingBudget, 10) || 16000,
false
).catch(console.warn);
}
}

Expand Down