block · angiejones · Sep 10, 2025 · Sep 10, 2025 · Sep 10, 2025 · Sep 10, 2025
diff --git a/.github/workflows/validate-recipe-pr.yml b/.github/workflows/validate-recipe-pr.yml
@@ -40,7 +40,7 @@ jobs:
           mkdir -p ~/.config/goose
           cat <<EOF > ~/.config/goose/config.yaml
           GOOSE_PROVIDER: openrouter
-          GOOSE_MODEL: "anthropic/claude-3.5-sonnet"
+          GOOSE_MODEL: "anthropic/claude-sonnet-4"
           keyring: false
           EOF
 

diff --git a/BUILDING_DOCKER.md b/BUILDING_DOCKER.md
@@ -138,7 +138,7 @@ docker-compose run --rm goose session
 The Docker image accepts all standard Goose environment variables:
 
 - `GOOSE_PROVIDER`: LLM provider (openai, anthropic, google, etc.)
-- `GOOSE_MODEL`: Model to use (gpt-4o, claude-3-5-sonnet, etc.)
+- `GOOSE_MODEL`: Model to use (gpt-4o, claude-sonnet-4, etc.)
 - Provider-specific API keys (OPENAI_API_KEY, ANTHROPIC_API_KEY, etc.)
 
 ### Persistent Configuration

diff --git a/crates/goose-bench/README.md b/crates/goose-bench/README.md
@@ -61,7 +61,7 @@ Benchmark configuration is provided through a JSON file. Here's a sample configu
     },
     {
       "provider": "databricks",
-      "name": "claude-3-5-sonnet",
+      "name": "claude-sonnet-4",
       "parallel_safe": true,
       "tool_shim": null
     },
@@ -232,7 +232,7 @@ Example of creating a config to re-run failed evaluations:
   "models": [
     {
       "provider": "databricks",
-      "name": "claude-3-5-sonnet",
+      "name": "claude-sonnet-4",
       "parallel_safe": false
     }
   ],

diff --git a/crates/goose-bench/src/bench_config.rs b/crates/goose-bench/src/bench_config.rs
@@ -51,7 +51,7 @@ impl Default for BenchRunConfig {
                 },
                 BenchModel {
                     provider: "databricks".to_string(),
-                    name: "goose-claude-3-5-sonnet".to_string(),
+                    name: "goose-claude-4-sonnet".to_string(),
                     parallel_safe: true,
                     tool_shim: None,
                 },

diff --git a/crates/goose-cli/src/cli.rs b/crates/goose-cli/src/cli.rs
@@ -610,7 +610,7 @@ enum Command {
         #[arg(
             long = "model",
             value_name = "MODEL",
-            help = "Specify the model to use (e.g., 'gpt-4o', 'claude-3.5-sonnet')",
+            help = "Specify the model to use (e.g., 'gpt-4o', 'claude-sonnet-4-20250514')",
             long_help = "Override the GOOSE_MODEL environment variable for this run. The model must be supported by the specified provider."
         )]
         model: Option<String>,

diff --git a/crates/goose-cli/src/scenario_tests/provider_configs.rs b/crates/goose-cli/src/scenario_tests/provider_configs.rs
@@ -44,7 +44,7 @@ impl ProviderConfig {
 static PROVIDER_CONFIGS: LazyLock<Vec<ProviderConfig>> = LazyLock::new(|| {
     vec![
         ProviderConfig::simple("openai", "gpt-4o"),
-        ProviderConfig::simple("anthropic", "claude-3-5-sonnet-20241022"),
+        ProviderConfig::simple("anthropic", "claude-sonnet-4-20250514"),
         ProviderConfig {
             name: "azure_openai",
             model_name: "gpt-4o",
@@ -58,7 +58,7 @@ static PROVIDER_CONFIGS: LazyLock<Vec<ProviderConfig>> = LazyLock::new(|| {
         },
         ProviderConfig {
             name: "aws_bedrock",
-            model_name: "anthropic.claude-3-5-sonnet-20241022-v2:0",
+            model_name: "anthropic.claude-sonnet-4-20250514:0",
             required_env_vars: &["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"],
             env_modifications: None,
             skip_reason: Some("No valid keys around"),
@@ -67,12 +67,12 @@ static PROVIDER_CONFIGS: LazyLock<Vec<ProviderConfig>> = LazyLock::new(|| {
         ProviderConfig::simple("groq", "llama-3.3-70b-versatile"),
         ProviderConfig::simple_skip(
             "openrouter",
-            "anthropic/claude-3.5-sonnet",
+            "anthropic/claude-sonnet-4",
             Some("Key is no longer valid"),
         ),
         ProviderConfig::simple_skip(
             "claude-code",
-            "claude-3-5-sonnet",
+            "claude-sonnet-4-20250514",
             Some("No keys available"),
         ),
         ProviderConfig::simple_skip("cursor-agent", "gpt-5", Some("No keys available")),

diff --git a/crates/goose-cli/src/scenario_tests/recordings/anthropic/image_analysis.json b/crates/goose-cli/src/scenario_tests/recordings/anthropic/image_analysis.json
@@ -1,7 +1,7 @@
 {
   "c848f22f273e158c32435d3e72cc999c046dc1a9afdc3efda68ff451f833a185": {
     "input": {
-      "system": "You are a general-purpose AI agent called Goose, created by Block, the parent company of Square, CashApp, and Tidal. Goose is being developed as an open-source software project.\n\nThe current date is 2025-07-28 12:04:16.\n\nGoose uses LLM providers with tool calling capability. You can be used with different language models (gpt-4o, claude-3.5-sonnet, o1, llama-3.2, deepseek-r1, etc).\nThese models have varying knowledge cut-off dates depending on when they were trained, but typically it's between 5-10 months prior to the current date.\n\n# Extensions\n\nExtensions allow other applications to provide context to Goose. Extensions connect Goose to different data sources and tools.\nYou are capable of dynamically plugging into new extensions and learning how to use them. You solve higher level problems using the tools in these extensions, and can interact with multiple at once.\nUse the search_available_extensions tool to find additional extensions to enable to help with your task. To enable extensions, use the enable_extension tool and provide the extension_name. You should only enable extensions found from the search_available_extensions tool.\n\n\nBecause you dynamically load extensions, your conversation history may refer\nto interactions with extensions that are not currently active. The currently\nactive extensions are below. Each of these extensions provides tools that are\nin your tool specification.\n\n\n## weather_extension\n\n\n\n\n\n\n\n# Suggestion\n\"\"\n\n\n\n\n# Response Guidelines\n\n- Use Markdown formatting for all responses.\n- Follow best practices for Markdown, including:\n  - Using headers for organization.\n  - Bullet points for lists.\n  - Links formatted correctly, either as linked text (e.g., [this is linked text](https://example.com)) or automatic links using angle brackets (e.g., <http://example.com/>).\n- For code examples, use fenced code blocks by placing triple backticks (` ``` `) before and after the code. Include the language identifier after the opening backticks (e.g., ` ```python `) to enable syntax highlighting.\n- Ensure clarity, conciseness, and proper formatting to enhance readability and usability.\n\n# Additional Instructions:\n\nRight now you are *NOT* in the chat only mode and have access to tool use and system.",
+      "system": "You are a general-purpose AI agent called Goose, created by Block, the parent company of Square, CashApp, and Tidal. Goose is being developed as an open-source software project.\n\nThe current date is 2025-07-28 12:04:16.\n\nGoose uses LLM providers with tool calling capability. You can be used with different language models (gpt-4o, claude-sonnet-4, o1, llama-3.2, deepseek-r1, etc).\nThese models have varying knowledge cut-off dates depending on when they were trained, but typically it's between 5-10 months prior to the current date.\n\n# Extensions\n\nExtensions allow other applications to provide context to Goose. Extensions connect Goose to different data sources and tools.\nYou are capable of dynamically plugging into new extensions and learning how to use them. You solve higher level problems using the tools in these extensions, and can interact with multiple at once.\nUse the search_available_extensions tool to find additional extensions to enable to help with your task. To enable extensions, use the enable_extension tool and provide the extension_name. You should only enable extensions found from the search_available_extensions tool.\n\n\nBecause you dynamically load extensions, your conversation history may refer\nto interactions with extensions that are not currently active. The currently\nactive extensions are below. Each of these extensions provides tools that are\nin your tool specification.\n\n\n## weather_extension\n\n\n\n\n\n\n\n# Suggestion\n\"\"\n\n\n\n\n# Response Guidelines\n\n- Use Markdown formatting for all responses.\n- Follow best practices for Markdown, including:\n  - Using headers for organization.\n  - Bullet points for lists.\n  - Links formatted correctly, either as linked text (e.g., [this is linked text](https://example.com)) or automatic links using angle brackets (e.g., <http://example.com/>).\n- For code examples, use fenced code blocks by placing triple backticks (` ``` `) before and after the code. Include the language identifier after the opening backticks (e.g., ` ```python `) to enable syntax highlighting.\n- Ensure clarity, conciseness, and proper formatting to enhance readability and usability.\n\n# Additional Instructions:\n\nRight now you are *NOT* in the chat only mode and have access to tool use and system.",
       "messages": [
         {
           "id": null,
@@ -242,7 +242,7 @@
         ]
       },
       "usage": {
-        "model": "claude-3-5-sonnet-20241022",
+        "model": "claude-sonnet-4-20250514",
         "usage": {
           "input_tokens": 2560,
           "output_tokens": 111,
@@ -253,7 +253,7 @@
   },
   "78cc474ff2d51b9a24df8c35e5c75f256dafb67ff5489af30fcec95cd87790b8": {
     "input": {
-      "system": "You are a general-purpose AI agent called Goose, created by Block, the parent company of Square, CashApp, and Tidal. Goose is being developed as an open-source software project.\n\nThe current date is 2025-07-28 12:04:16.\n\nGoose uses LLM providers with tool calling capability. You can be used with different language models (gpt-4o, claude-3.5-sonnet, o1, llama-3.2, deepseek-r1, etc).\nThese models have varying knowledge cut-off dates depending on when they were trained, but typically it's between 5-10 months prior to the current date.\n\n# Extensions\n\nExtensions allow other applications to provide context to Goose. Extensions connect Goose to different data sources and tools.\nYou are capable of dynamically plugging into new extensions and learning how to use them. You solve higher level problems using the tools in these extensions, and can interact with multiple at once.\nUse the search_available_extensions tool to find additional extensions to enable to help with your task. To enable extensions, use the enable_extension tool and provide the extension_name. You should only enable extensions found from the search_available_extensions tool.\n\n\nBecause you dynamically load extensions, your conversation history may refer\nto interactions with extensions that are not currently active. The currently\nactive extensions are below. Each of these extensions provides tools that are\nin your tool specification.\n\n\n## weather_extension\n\n\n\n\n\n\n\n# Suggestion\n\"\"\n\n\n\n\n# Response Guidelines\n\n- Use Markdown formatting for all responses.\n- Follow best practices for Markdown, including:\n  - Using headers for organization.\n  - Bullet points for lists.\n  - Links formatted correctly, either as linked text (e.g., [this is linked text](https://example.com)) or automatic links using angle brackets (e.g., <http://example.com/>).\n- For code examples, use fenced code blocks by placing triple backticks (` ``` `) before and after the code. Include the language identifier after the opening backticks (e.g., ` ```python `) to enable syntax highlighting.\n- Ensure clarity, conciseness, and proper formatting to enhance readability and usability.\n\n# Additional Instructions:\n\nRight now you are *NOT* in the chat only mode and have access to tool use and system.",
+      "system": "You are a general-purpose AI agent called Goose, created by Block, the parent company of Square, CashApp, and Tidal. Goose is being developed as an open-source software project.\n\nThe current date is 2025-07-28 12:04:16.\n\nGoose uses LLM providers with tool calling capability. You can be used with different language models (gpt-4o, claude-sonnet-4, o1, llama-3.2, deepseek-r1, etc).\nThese models have varying knowledge cut-off dates depending on when they were trained, but typically it's between 5-10 months prior to the current date.\n\n# Extensions\n\nExtensions allow other applications to provide context to Goose. Extensions connect Goose to different data sources and tools.\nYou are capable of dynamically plugging into new extensions and learning how to use them. You solve higher level problems using the tools in these extensions, and can interact with multiple at once.\nUse the search_available_extensions tool to find additional extensions to enable to help with your task. To enable extensions, use the enable_extension tool and provide the extension_name. You should only enable extensions found from the search_available_extensions tool.\n\n\nBecause you dynamically load extensions, your conversation history may refer\nto interactions with extensions that are not currently active. The currently\nactive extensions are below. Each of these extensions provides tools that are\nin your tool specification.\n\n\n## weather_extension\n\n\n\n\n\n\n\n# Suggestion\n\"\"\n\n\n\n\n# Response Guidelines\n\n- Use Markdown formatting for all responses.\n- Follow best practices for Markdown, including:\n  - Using headers for organization.\n  - Bullet points for lists.\n  - Links formatted correctly, either as linked text (e.g., [this is linked text](https://example.com)) or automatic links using angle brackets (e.g., <http://example.com/>).\n- For code examples, use fenced code blocks by placing triple backticks (` ``` `) before and after the code. Include the language identifier after the opening backticks (e.g., ` ```python `) to enable syntax highlighting.\n- Ensure clarity, conciseness, and proper formatting to enhance readability and usability.\n\n# Additional Instructions:\n\nRight now you are *NOT* in the chat only mode and have access to tool use and system.",
       "messages": [
         {
           "id": null,
@@ -525,7 +525,7 @@
         ]
       },
       "usage": {
-        "model": "claude-3-5-sonnet-20241022",
+        "model": "claude-sonnet-4-20250514",
         "usage": {
           "input_tokens": 3053,
           "output_tokens": 82,