aaif-goose · DOsinga · Feb 24, 2026 · Feb 24, 2026 · Feb 24, 2026 · Feb 24, 2026
diff --git a/crates/goose/src/agents/platform_extensions/apps.rs b/crates/goose/src/agents/platform_extensions/apps.rs
@@ -290,8 +290,7 @@ impl AppsManagerClient {
         let messages = vec![Message::user().with_text(&user_prompt)];
         let tools = vec![Self::create_app_content_tool()];
 
-        let mut model_config = provider.get_model_config();
-        model_config.max_tokens = Some(16384);
+        let model_config = provider.get_model_config();
 
         let (response, _usage) = provider
             .complete(&model_config, session_id, &system_prompt, &messages, &tools)
@@ -324,8 +323,7 @@ impl AppsManagerClient {
         let messages = vec![Message::user().with_text(&user_prompt)];
         let tools = vec![Self::update_app_content_tool()];
 
-        let mut model_config = provider.get_model_config();
-        model_config.max_tokens = Some(16384);
+        let model_config = provider.get_model_config();
 
         let (response, _usage) = provider
             .complete(&model_config, session_id, &system_prompt, &messages, &tools)

diff --git a/crates/goose/src/model.rs b/crates/goose/src/model.rs
@@ -280,8 +280,7 @@ impl ModelConfig {
             return tokens;
         }
 
-        // Priority 2: Global default
-        4_096
+        16_384
     }
 
     pub fn new_or_fail(model_name: &str) -> ModelConfig {

diff --git a/crates/goose/src/providers/formats/anthropic.rs b/crates/goose/src/providers/formats/anthropic.rs
@@ -431,10 +431,12 @@ pub fn create_request(
 
     let is_thinking_enabled = std::env::var("CLAUDE_THINKING_ENABLED").is_ok();
     if is_thinking_enabled {
-        let budget_tokens = std::env::var("CLAUDE_THINKING_BUDGET")
-            .unwrap_or_else(|_| "16000".to_string())
-            .parse()
-            .unwrap_or(16000);
+        // Anthropic requires budget_tokens >= 1024
+        const DEFAULT_THINKING_BUDGET: i32 = 16000;
+        let budget_tokens: i32 = std::env::var("CLAUDE_THINKING_BUDGET")
+            .ok()
+            .and_then(|s| s.parse().ok())
+            .unwrap_or(DEFAULT_THINKING_BUDGET);
 
         payload
             .as_object_mut()

diff --git a/crates/goose/src/providers/formats/databricks.rs b/crates/goose/src/providers/formats/databricks.rs
@@ -613,19 +613,19 @@ pub fn create_request(
 
     let is_thinking_enabled = std::env::var("CLAUDE_THINKING_ENABLED").is_ok();
     if is_claude_sonnet && is_thinking_enabled {
-        // Minimum budget_tokens is 1024
-        let budget_tokens = std::env::var("CLAUDE_THINKING_BUDGET")
-            .unwrap_or_else(|_| "16000".to_string())
-            .parse()
-            .unwrap_or(16000);
-
-        // For Claude models with thinking enabled, we need to add max_tokens + budget_tokens
-        // Default to 8192 (Claude max output) + budget if not specified
-        let max_completion_tokens = model_config.max_tokens.unwrap_or(8192);
-        payload.as_object_mut().unwrap().insert(
-            "max_tokens".to_string(),
-            json!(max_completion_tokens + budget_tokens),
-        );
+        // Anthropic requires budget_tokens >= 1024
+        const DEFAULT_THINKING_BUDGET: i32 = 16000;
+        let budget_tokens: i32 = std::env::var("CLAUDE_THINKING_BUDGET")
+            .ok()
+            .and_then(|s| s.parse().ok())
+            .unwrap_or(DEFAULT_THINKING_BUDGET);
-            .unwrap_or(DEFAULT_THINKING_BUDGET);
+            .unwrap_or(DEFAULT_THINKING_BUDGET)
+            .max(1024);
-            .unwrap_or(DEFAULT_THINKING_BUDGET);
+            .unwrap_or(DEFAULT_THINKING_BUDGET)
+            .max(1024);
+
+        // With thinking enabled, max_tokens must include both output and thinking budget
+        let max_tokens = model_config.max_output_tokens() + budget_tokens;
+        payload
+            .as_object_mut()
+            .unwrap()
+            .insert("max_tokens".to_string(), json!(max_tokens));
 
         payload.as_object_mut().unwrap().insert(
             "thinking".to_string(),
@@ -650,18 +650,16 @@ pub fn create_request(
             }
         }
 
-        // open ai reasoning models use max_completion_tokens instead of max_tokens
-        if let Some(tokens) = model_config.max_tokens {
-            let key = if is_openai_reasoning_model {
-                "max_completion_tokens"
-            } else {
-                "max_tokens"
-            };
-            payload
-                .as_object_mut()
-                .unwrap()
-                .insert(key.to_string(), json!(tokens));
-        }
+        // OpenAI reasoning models use max_completion_tokens instead of max_tokens
+        let key = if is_openai_reasoning_model {
+            "max_completion_tokens"
+        } else {
+            "max_tokens"
+        };
+        payload
+            .as_object_mut()
+            .unwrap()
+            .insert(key.to_string(), json!(model_config.max_output_tokens()));
     }
 
     // Apply cache control for Claude models to enable prompt caching

diff --git a/crates/goose/src/providers/formats/google.rs b/crates/goose/src/providers/formats/google.rs
@@ -592,18 +592,11 @@ pub fn create_request(
 
     let thinking_config = get_thinking_config(model_config);
 
-    let generation_config = if model_config.temperature.is_some()
-        || model_config.max_tokens.is_some()
-        || thinking_config.is_some()
-    {
-        Some(GenerationConfig {
-            temperature: model_config.temperature.map(|t| t as f64),
-            max_output_tokens: model_config.max_tokens,
-            thinking_config,
-        })
-    } else {
-        None
-    };
+    let generation_config = Some(GenerationConfig {
+        temperature: model_config.temperature.map(|t| t as f64),
+        max_output_tokens: Some(model_config.max_output_tokens()),
+        thinking_config,
+    });
 
     let request = GoogleRequest {
         system_instruction: SystemInstruction {

diff --git a/crates/goose/src/providers/formats/openai.rs b/crates/goose/src/providers/formats/openai.rs
@@ -828,18 +828,16 @@ pub fn create_request(
         }
     }
 
-    // o1 models use max_completion_tokens instead of max_tokens
-    if let Some(tokens) = model_config.max_tokens {
-        let key = if is_ox_model {
-            "max_completion_tokens"
-        } else {
-            "max_tokens"
-        };
-        payload
-            .as_object_mut()
-            .unwrap()
-            .insert(key.to_string(), json!(tokens));
-    }
+    // o1/o3 models use max_completion_tokens instead of max_tokens
+    let key = if is_ox_model {
+        "max_completion_tokens"
+    } else {
+        "max_tokens"
+    };
+    payload
+        .as_object_mut()
+        .unwrap()
+        .insert(key.to_string(), json!(model_config.max_output_tokens()));
 
     if for_streaming {
         payload["stream"] = json!(true);

diff --git a/crates/goose/src/providers/formats/openai_responses.rs b/crates/goose/src/providers/formats/openai_responses.rs
@@ -460,12 +460,10 @@ pub fn create_responses_request(
             .insert("temperature".to_string(), json!(temp));
     }
 
-    if let Some(tokens) = model_config.max_tokens {
-        payload
-            .as_object_mut()
-            .unwrap()
-            .insert("max_output_tokens".to_string(), json!(tokens));
-    }
+    payload.as_object_mut().unwrap().insert(
+        "max_output_tokens".to_string(),
+        json!(model_config.max_output_tokens()),
+    );
 
     Ok(payload)
 }

diff --git a/crates/goose/src/providers/formats/snowflake.rs b/crates/goose/src/providers/formats/snowflake.rs
@@ -353,11 +353,10 @@ pub fn create_request(
         format_tools(tools)
     };
 
-    let max_tokens = model_config.max_tokens.unwrap_or(4096);
     let mut payload = json!({
         "model": model_config.model_name,
         "messages": snowflake_messages,
-        "max_tokens": max_tokens,
+        "max_tokens": model_config.max_output_tokens(),
     });
 
     // Add tools if present and not a description request

diff --git a/crates/goose/src/providers/sagemaker_tgi.rs b/crates/goose/src/providers/sagemaker_tgi.rs
@@ -149,7 +149,7 @@ impl SageMakerTgiProvider {
         let request = json!({
             "inputs": prompt,
             "parameters": {
-                "max_new_tokens": self.model.max_tokens.unwrap_or(150),
+                "max_new_tokens": self.model.max_output_tokens(),
                 "temperature": self.model.temperature.unwrap_or(0.7),
                 "do_sample": true,
                 "return_full_text": false
-Original file line number
+Diff line change
@@ Expand Up / @@ -280,8 +280,7 @@ impl ModelConfig { @@
                 return tokens;
             }
-            // Priority 2: Global default
-_096
+_384
         }
         pub fn new_or_fail(model_name: &str) -> ModelConfig {
@@ Expand Down @@