From dfbb7e41064a65d133fba507841c57a52ec0f6cf Mon Sep 17 00:00:00 2001
From: Uddhav Kambli <uddhav@kambli.net>
Date: Sun, 20 Apr 2025 01:13:35 -0400
Subject: [PATCH] feat(gcpvertexai): do HTTP 429 like retries for Anthropic API
 HTTP 529 overloaded status code

---
 .../src/routes/providers_and_keys.json        |   2 +-
 .../src/providers/formats/gcpvertexai.rs      |   6 -
 crates/goose/src/providers/gcpvertexai.rs     | 286 +++++++++++++-----
 .../docs/getting-started/providers.md         |  24 +-
 .../settings/models/GooseModels.tsx           |   1 -
 .../settings/models/hardcoded_stuff.tsx       |   2 -
 6 files changed, 227 insertions(+), 94 deletions(-)

diff --git a/crates/goose-server/src/routes/providers_and_keys.json b/crates/goose-server/src/routes/providers_and_keys.json
index 830cf665b365..ff950d58b8cc 100644
--- a/crates/goose-server/src/routes/providers_and_keys.json
+++ b/crates/goose-server/src/routes/providers_and_keys.json
@@ -20,7 +20,7 @@
     "gcp_vertex_ai": {
         "name": "GCP Vertex AI",
         "description": "Use Vertex AI platform models",
-        "models": ["claude-3-5-haiku@20241022", "claude-3-5-sonnet@20240620", "claude-3-5-sonnet-v2@20241022", "claude-3-7-sonnet@20250219", "gemini-1.5-pro-002", "gemini-2.0-flash-001", "gemini-2.0-pro-exp-02-05", "gemini-2.5-pro-exp-03-25"],
+        "models": ["claude-3-5-haiku@20241022", "claude-3-5-sonnet@20240620", "claude-3-5-sonnet-v2@20241022", "claude-3-7-sonnet@20250219", "gemini-1.5-pro-002", "gemini-2.0-flash-001", "gemini-2.5-pro-exp-03-25"],
         "required_keys": ["GCP_PROJECT_ID", "GCP_LOCATION"]
     },
     "google": {
diff --git a/crates/goose/src/providers/formats/gcpvertexai.rs b/crates/goose/src/providers/formats/gcpvertexai.rs
index b4c57e56ba9c..8d0c1c94cb5d 100644
--- a/crates/goose/src/providers/formats/gcpvertexai.rs
+++ b/crates/goose/src/providers/formats/gcpvertexai.rs
@@ -92,8 +92,6 @@ pub enum GeminiVersion {
     Pro15,
     /// Gemini 2.0 Flash version
     Flash20,
-    /// Gemini 2.0 Pro Experimental version
-    Pro20Exp,
     /// Gemini 2.5 Pro Experimental version
     Pro25Exp,
     /// Generic Gemini model for custom or new versions
@@ -113,7 +111,6 @@ impl fmt::Display for GcpVertexAIModel {
             Self::Gemini(version) => match version {
                 GeminiVersion::Pro15 => "gemini-1.5-pro-002",
                 GeminiVersion::Flash20 => "gemini-2.0-flash-001",
-                GeminiVersion::Pro20Exp => "gemini-2.0-pro-exp-02-05",
                 GeminiVersion::Pro25Exp => "gemini-2.5-pro-exp-03-25",
                 GeminiVersion::Generic(name) => name,
             },
@@ -148,7 +145,6 @@ impl TryFrom<&str> for GcpVertexAIModel {
             "claude-3-5-haiku@20241022" => Ok(Self::Claude(ClaudeVersion::Haiku35)),
             "gemini-1.5-pro-002" => Ok(Self::Gemini(GeminiVersion::Pro15)),
             "gemini-2.0-flash-001" => Ok(Self::Gemini(GeminiVersion::Flash20)),
-            "gemini-2.0-pro-exp-02-05" => Ok(Self::Gemini(GeminiVersion::Pro20Exp)),
             "gemini-2.5-pro-exp-03-25" => Ok(Self::Gemini(GeminiVersion::Pro25Exp)),
             // Generic models based on prefix matching
             _ if s.starts_with("claude-") => {
@@ -342,7 +338,6 @@ mod tests {
             "claude-3-5-haiku@20241022",
             "gemini-1.5-pro-002",
             "gemini-2.0-flash-001",
-            "gemini-2.0-pro-exp-02-05",
             "gemini-2.5-pro-exp-03-25",
         ];
 
@@ -364,7 +359,6 @@ mod tests {
             ("claude-3-5-haiku@20241022", GcpLocation::Ohio),
             ("gemini-1.5-pro-002", GcpLocation::Iowa),
             ("gemini-2.0-flash-001", GcpLocation::Iowa),
-            ("gemini-2.0-pro-exp-02-05", GcpLocation::Iowa),
             ("gemini-2.5-pro-exp-03-25", GcpLocation::Iowa),
         ];
 
diff --git a/crates/goose/src/providers/gcpvertexai.rs b/crates/goose/src/providers/gcpvertexai.rs
index f0b6d6ef5b70..09d81c55370f 100644
--- a/crates/goose/src/providers/gcpvertexai.rs
+++ b/crates/goose/src/providers/gcpvertexai.rs
@@ -2,6 +2,7 @@ use std::time::Duration;
 
 use anyhow::Result;
 use async_trait::async_trait;
+use once_cell::sync::Lazy;
 use reqwest::{Client, StatusCode};
 use serde_json::Value;
 use tokio::time::sleep;
@@ -34,6 +35,9 @@ const DEFAULT_MAX_RETRIES: usize = 6;
 const DEFAULT_BACKOFF_MULTIPLIER: f64 = 2.0;
 /// Default maximum interval for retry (in milliseconds)
 const DEFAULT_MAX_RETRY_INTERVAL_MS: u64 = 320_000;
+/// Status code for Anthropic's API overloaded error (529)
+static STATUS_API_OVERLOADED: Lazy<StatusCode> =
+    Lazy::new(|| StatusCode::from_u16(529).expect("Valid status code 529 for API_OVERLOADED"));
 
 /// Represents errors specific to GCP Vertex AI operations.
 #[derive(Debug, thiserror::Error)]
@@ -50,8 +54,10 @@ enum GcpVertexAIError {
 /// Retry configuration for handling rate limit errors
 #[derive(Debug, Clone)]
 struct RetryConfig {
-    /// Maximum number of retry attempts
-    max_retries: usize,
+    /// Maximum number of retry attempts for 429 errors
+    max_rate_limit_retries: usize,
+    /// Maximum number of retry attempts for 529 errors
+    max_overloaded_retries: usize,
     /// Initial interval between retries in milliseconds
     initial_interval_ms: u64,
     /// Multiplier for backoff (exponential)
@@ -63,7 +69,8 @@ struct RetryConfig {
 impl Default for RetryConfig {
     fn default() -> Self {
         Self {
-            max_retries: DEFAULT_MAX_RETRIES,
+            max_rate_limit_retries: DEFAULT_MAX_RETRIES,
+            max_overloaded_retries: DEFAULT_MAX_RETRIES,
             initial_interval_ms: DEFAULT_INITIAL_RETRY_INTERVAL_MS,
             backoff_multiplier: DEFAULT_BACKOFF_MULTIPLIER,
             max_interval_ms: DEFAULT_MAX_RETRY_INTERVAL_MS,
@@ -92,6 +99,19 @@ impl RetryConfig {
 
         Duration::from_millis(jittered_delay_ms)
     }
+
+    /// Get max retries for a specific error type
+    #[allow(dead_code)] // Used in tests
+    fn max_retries_for_status(&self, status: StatusCode) -> usize {
+        if status == StatusCode::TOO_MANY_REQUESTS {
+            self.max_rate_limit_retries
+        } else if status == *STATUS_API_OVERLOADED {
+            self.max_overloaded_retries
+        } else {
+            // Default to rate limit retries for any other status code
+            self.max_rate_limit_retries
+        }
+    }
 }
 
 /// Provider implementation for Google Cloud Platform's Vertex AI service.
@@ -172,10 +192,32 @@ impl GcpVertexAIProvider {
 
     /// Loads retry configuration from environment variables or uses defaults.
     fn load_retry_config(config: &crate::config::Config) -> RetryConfig {
-        let max_retries = config
-            .get_param("GCP_MAX_RETRIES")
+        // Load max retries for 429 rate limit errors
+        let max_rate_limit_retries = config
+            .get_param("GCP_MAX_RATE_LIMIT_RETRIES")
+            .ok()
+            .and_then(|v: String| v.parse::<usize>().ok())
+            .or_else(|| {
+                // Fall back to generic GCP_MAX_RETRIES if specific one isn't set
+                config
+                    .get_param("GCP_MAX_RETRIES")
+                    .ok()
+                    .and_then(|v: String| v.parse::<usize>().ok())
+            })
+            .unwrap_or(DEFAULT_MAX_RETRIES);
+
+        // Load max retries for 529 API overloaded errors
+        let max_overloaded_retries = config
+            .get_param("GCP_MAX_OVERLOADED_RETRIES")
             .ok()
             .and_then(|v: String| v.parse::<usize>().ok())
+            .or_else(|| {
+                // Fall back to generic GCP_MAX_RETRIES if specific one isn't set
+                config
+                    .get_param("GCP_MAX_RETRIES")
+                    .ok()
+                    .and_then(|v: String| v.parse::<usize>().ok())
+            })
             .unwrap_or(DEFAULT_MAX_RETRIES);
 
         let initial_interval_ms = config
@@ -197,7 +239,8 @@ impl GcpVertexAIProvider {
             .unwrap_or(DEFAULT_MAX_RETRY_INTERVAL_MS);
 
         RetryConfig {
-            max_retries,
+            max_rate_limit_retries,
+            max_overloaded_retries,
             initial_interval_ms,
             backoff_multiplier,
             max_interval_ms,
@@ -269,7 +312,7 @@ impl GcpVertexAIProvider {
     }
 
     /// Makes an authenticated POST request to the Vertex AI API at a specific location.
-    /// Includes retry logic for 429 Too Many Requests errors.
+    /// Includes retry logic for 429 (Too Many Requests) and 529 (API Overloaded) errors.
     ///
     /// # Arguments
     /// * `payload` - The request payload to send
@@ -285,21 +328,12 @@ impl GcpVertexAIProvider {
             .build_request_url(context.provider(), location)
             .map_err(|e| ProviderError::RequestFailed(e.to_string()))?;
 
-        // Initialize retry counter
-        let mut attempts = 0;
+        // Initialize separate counters for different error types
+        let mut rate_limit_attempts = 0;
+        let mut overloaded_attempts = 0;
         let mut last_error = None;
 
         loop {
-            // Check if we've exceeded max retries
-            if attempts > 0 && attempts > self.retry_config.max_retries {
-                let error_msg = format!(
-                    "Exceeded maximum retry attempts ({}) for rate limiting (429)",
-                    self.retry_config.max_retries
-                );
-                tracing::error!("{}", error_msg);
-                return Err(last_error.unwrap_or(ProviderError::RateLimitExceeded(error_msg)));
-            }
-
             // Get a fresh auth token for each attempt
             let auth_header = self
                 .get_auth_header()
@@ -318,60 +352,116 @@ impl GcpVertexAIProvider {
 
             let status = response.status();
 
-            // If not a 429, process normally
-            if status != StatusCode::TOO_MANY_REQUESTS {
-                let response_json = response.json::<Value>().await.map_err(|e| {
-                    ProviderError::RequestFailed(format!("Failed to parse response: {e}"))
-                })?;
-
-                return match status {
-                    StatusCode::OK => Ok(response_json),
-                    StatusCode::UNAUTHORIZED | StatusCode::FORBIDDEN => {
-                        tracing::debug!(
-                            "Authentication failed. Status: {status}, Payload: {payload:?}"
+            // Handle 429 Too Many Requests and 529 API Overloaded errors
+            match status {
+                status if status == StatusCode::TOO_MANY_REQUESTS => {
+                    rate_limit_attempts += 1;
+
+                    if rate_limit_attempts > self.retry_config.max_rate_limit_retries {
+                        let error_msg = format!(
+                            "Exceeded maximum retry attempts ({}) for rate limiting (429) errors",
+                            self.retry_config.max_rate_limit_retries
+                        );
+                        tracing::error!("{}", error_msg);
+                        return Err(
+                            last_error.unwrap_or(ProviderError::RateLimitExceeded(error_msg))
                         );
-                        Err(ProviderError::Authentication(format!(
-                            "Authentication failed: {response_json:?}"
-                        )))
                     }
-                    _ => {
-                        tracing::debug!(
-                            "Request failed. Status: {status}, Response: {response_json:?}"
+
+                    // Try to parse response for more detailed error info
+                    let cite_gcp_vertex_429 =
+                        "See https://cloud.google.com/vertex-ai/generative-ai/docs/error-code-429";
+                    let response_text = response.text().await.unwrap_or_default();
+
+                    let error_message =
+                        if response_text.contains("Exceeded the Provisioned Throughput") {
+                            // Handle 429 rate limit due to throughput limits
+                            format!("Exceeded the Provisioned Throughput: {cite_gcp_vertex_429}")
+                        } else {
+                            // Handle generic 429 rate limit
+                            format!("Pay-as-you-go resource exhausted: {cite_gcp_vertex_429}")
+                        };
+
+                    tracing::warn!(
+                        "Rate limit exceeded error (429) (attempt {}/{}): {}. Retrying after backoff...",
+                        rate_limit_attempts,
+                        self.retry_config.max_rate_limit_retries,
+                        error_message
+                    );
+
+                    // Store the error in case we need to return it after max retries
+                    last_error = Some(ProviderError::RateLimitExceeded(error_message));
+
+                    // Calculate and apply the backoff delay
+                    let delay = self.retry_config.delay_for_attempt(rate_limit_attempts);
+                    tracing::info!("Backing off for {:?} before retry (rate limit 429)", delay);
+                    sleep(delay).await;
+                }
+                status if status == *STATUS_API_OVERLOADED => {
+                    overloaded_attempts += 1;
+
+                    if overloaded_attempts > self.retry_config.max_overloaded_retries {
+                        let error_msg = format!(
+                            "Exceeded maximum retry attempts ({}) for API overloaded (529) errors",
+                            self.retry_config.max_overloaded_retries
+                        );
+                        tracing::error!("{}", error_msg);
+                        return Err(
+                            last_error.unwrap_or(ProviderError::RateLimitExceeded(error_msg))
                         );
-                        Err(ProviderError::RequestFailed(format!(
-                            "Request failed with status {status}: {response_json:?}"
-                        )))
                     }
-                };
-            }
 
-            // Handle 429 Too Many Requests
-            attempts += 1;
-
-            // Try to parse response for more detailed error info
-            let cite_gcp_vertex_429 =
-                "See https://cloud.google.com/vertex-ai/generative-ai/docs/error-code-429";
-            let response_text = response.text().await.unwrap_or_default();
-            let quota_error = if response_text.contains("Exceeded the Provisioned Throughput") {
-                format!("Exceeded the Provisioned Throughput: {cite_gcp_vertex_429}.")
-            } else {
-                format!("Pay-as-you-go resource exhausted: {cite_gcp_vertex_429}.")
-            };
-
-            tracing::warn!(
-                "Rate limit exceeded (attempt {}/{}): {}. Retrying after backoff...",
-                attempts,
-                self.retry_config.max_retries,
-                quota_error
-            );
-
-            // Store the error in case we need to return it after max retries
-            last_error = Some(ProviderError::RateLimitExceeded(quota_error));
-
-            // Calculate and apply the backoff delay
-            let delay = self.retry_config.delay_for_attempt(attempts);
-            tracing::info!("Backing off for {:?} before retry", delay);
-            sleep(delay).await;
+                    // Handle 529 Overloaded error (https://docs.anthropic.com/en/api/errors)
+                    let error_message =
+                        "Vertex AI Provider API is temporarily overloaded. This is similar to a rate limit \
+                        error but indicates backend processing capacity issues."
+                            .to_string();
+
+                    tracing::warn!(
+                        "API overloaded error (529) (attempt {}/{}): {}. Retrying after backoff...",
+                        overloaded_attempts,
+                        self.retry_config.max_overloaded_retries,
+                        error_message
+                    );
+
+                    // Store the error in case we need to return it after max retries
+                    last_error = Some(ProviderError::RateLimitExceeded(error_message));
+
+                    // Calculate and apply the backoff delay
+                    let delay = self.retry_config.delay_for_attempt(overloaded_attempts);
+                    tracing::info!(
+                        "Backing off for {:?} before retry (API overloaded 529)",
+                        delay
+                    );
+                    sleep(delay).await;
+                }
+                // For any other status codes, process normally
+                _ => {
+                    let response_json = response.json::<Value>().await.map_err(|e| {
+                        ProviderError::RequestFailed(format!("Failed to parse response: {e}"))
+                    })?;
+
+                    return match status {
+                        StatusCode::OK => Ok(response_json),
+                        StatusCode::UNAUTHORIZED | StatusCode::FORBIDDEN => {
+                            tracing::debug!(
+                                "Authentication failed. Status: {status}, Payload: {payload:?}"
+                            );
+                            Err(ProviderError::Authentication(format!(
+                                "Authentication failed: {response_json:?}"
+                            )))
+                        }
+                        _ => {
+                            tracing::debug!(
+                                "Request failed. Status: {status}, Response: {response_json:?}"
+                            );
+                            Err(ProviderError::RequestFailed(format!(
+                                "Request failed with status {status}: {response_json:?}"
+                            )))
+                        }
+                    };
+                }
+            }
         }
     }
 
@@ -432,7 +522,7 @@ impl Provider for GcpVertexAIProvider {
             GcpVertexAIModel::Claude(ClaudeVersion::Haiku35),
             GcpVertexAIModel::Gemini(GeminiVersion::Pro15),
             GcpVertexAIModel::Gemini(GeminiVersion::Flash20),
-            GcpVertexAIModel::Gemini(GeminiVersion::Pro20Exp),
+            GcpVertexAIModel::Gemini(GeminiVersion::Pro25Exp),
         ]
         .iter()
         .map(|model| model.to_string())
@@ -452,6 +542,18 @@ impl Provider for GcpVertexAIProvider {
             vec![
                 ConfigKey::new("GCP_PROJECT_ID", true, false, None),
                 ConfigKey::new("GCP_LOCATION", true, false, Some(Iowa.to_string().as_str())),
+                ConfigKey::new(
+                    "GCP_MAX_RATE_LIMIT_RETRIES",
+                    false,
+                    false,
+                    Some(&DEFAULT_MAX_RETRIES.to_string()),
+                ),
+                ConfigKey::new(
+                    "GCP_MAX_OVERLOADED_RETRIES",
+                    false,
+                    false,
+                    Some(&DEFAULT_MAX_RETRIES.to_string()),
+                ),
                 ConfigKey::new(
                     "GCP_MAX_RETRIES",
                     false,
@@ -521,11 +623,13 @@ impl Provider for GcpVertexAIProvider {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use reqwest::StatusCode;
 
     #[test]
     fn test_retry_config_delay_calculation() {
         let config = RetryConfig {
-            max_retries: 5,
+            max_rate_limit_retries: 5,
+            max_overloaded_retries: 5,
             initial_interval_ms: 1000,
             backoff_multiplier: 2.0,
             max_interval_ms: 32000,
@@ -548,6 +652,44 @@ mod tests {
         assert!(delay10.as_millis() <= 38400); // max_interval_ms * 1.2 (max jitter)
     }
 
+    #[test]
+    fn test_max_retries_for_status() {
+        let config = RetryConfig {
+            max_rate_limit_retries: 5,
+            max_overloaded_retries: 10,
+            initial_interval_ms: 1000,
+            backoff_multiplier: 2.0,
+            max_interval_ms: 32000,
+        };
+
+        // Check that we get the right max retries for each error type
+        assert_eq!(
+            config.max_retries_for_status(StatusCode::TOO_MANY_REQUESTS),
+            5
+        );
+        assert_eq!(config.max_retries_for_status(*STATUS_API_OVERLOADED), 10);
+
+        // For any other status code, we should get the rate limit retries
+        assert_eq!(config.max_retries_for_status(StatusCode::BAD_REQUEST), 5);
+    }
+
+    #[test]
+    fn test_status_overloaded_code() {
+        // Test that we correctly handle the 529 status code
+
+        // Verify the custom status code is created correctly
+        assert_eq!(STATUS_API_OVERLOADED.as_u16(), 529);
+
+        // This is not a standard HTTP status code, so it's classified as server error
+        assert!(STATUS_API_OVERLOADED.is_server_error());
+
+        // Should be different from TOO_MANY_REQUESTS (429)
+        assert_ne!(*STATUS_API_OVERLOADED, StatusCode::TOO_MANY_REQUESTS);
+
+        // Should be different from SERVICE_UNAVAILABLE (503)
+        assert_ne!(*STATUS_API_OVERLOADED, StatusCode::SERVICE_UNAVAILABLE);
+    }
+
     #[test]
     fn test_model_provider_conversion() {
         assert_eq!(ModelProvider::Anthropic.as_str(), "anthropic");
@@ -592,7 +734,7 @@ mod tests {
             .collect();
         assert!(model_names.contains(&"claude-3-5-sonnet-v2@20241022".to_string()));
         assert!(model_names.contains(&"gemini-1.5-pro-002".to_string()));
-        // Should contain the original 2 config keys plus 4 new retry-related ones
-        assert_eq!(metadata.config_keys.len(), 6);
+        // Should contain the original 2 config keys plus 6 new retry-related ones
+        assert_eq!(metadata.config_keys.len(), 8);
     }
 }
diff --git a/documentation/docs/getting-started/providers.md b/documentation/docs/getting-started/providers.md
index 189357c4a50a..9807ae30fa5b 100644
--- a/documentation/docs/getting-started/providers.md
+++ b/documentation/docs/getting-started/providers.md
@@ -17,18 +17,18 @@ Goose relies heavily on tool calling capabilities and currently works best with
 
 ## Available Providers
 
-| Provider                                                                    | Description                                                                                                                                                                                                               | Parameters                                                                                                                                                                          |
-|-----------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| [Amazon Bedrock](https://aws.amazon.com/bedrock/)                           | Offers a variety of foundation models, including Claude, Jurassic-2, and others. **AWS environment variables must be set in advance, not configured through `goose configure`**                                           | `AWS_PROFILE`, or `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, `AWS_REGION`, ...                                                                                                   |
-| [Anthropic](https://www.anthropic.com/)                                     | Offers Claude, an advanced AI model for natural language tasks.                                                                                                                                                           | `ANTHROPIC_API_KEY`, `ANTHROPIC_HOST` (optional)                                                                                                                                                                 |
-| [Azure OpenAI](https://learn.microsoft.com/en-us/azure/ai-services/openai/) | Access Azure-hosted OpenAI models, including GPT-4 and GPT-3.5. Supports both API key and Azure credential chain authentication.                                                                                          | `AZURE_OPENAI_ENDPOINT`, `AZURE_OPENAI_DEPLOYMENT_NAME`, `AZURE_OPENAI_API_KEY` (optional)                                                                                           |
-| [Databricks](https://www.databricks.com/)                                   | Unified data analytics and AI platform for building and deploying models.                                                                                                                                                 | `DATABRICKS_HOST`, `DATABRICKS_TOKEN`                                                                                                                                               |
-| [Gemini](https://ai.google.dev/gemini-api/docs)                             | Advanced LLMs by Google with multimodal capabilities (text, images).                                                                                                                                                      | `GOOGLE_API_KEY`                                                                                                                                                                    |
-| [GCP Vertex AI](https://cloud.google.com/vertex-ai)                         | Google Cloud's Vertex AI platform, supporting Gemini and Claude models. **Credentials must be configured in advance. Follow the instructions at https://cloud.google.com/vertex-ai/docs/authentication.**                 | `GCP_PROJECT_ID`, `GCP_LOCATION` and optional `GCP_MAX_RETRIES` (6), `GCP_INITIAL_RETRY_INTERVAL_MS` (5000), `GCP_BACKOFF_MULTIPLIER` (2.0), `GCP_MAX_RETRY_INTERVAL_MS` (320_000). |
-| [Groq](https://groq.com/)                                                   | High-performance inference hardware and tools for LLMs.                                                                                                                                                                   | `GROQ_API_KEY`                                                                                                                                                                      |
-| [Ollama](https://ollama.com/)                                               | Local model runner supporting Qwen, Llama, DeepSeek, and other open-source models. **Because this provider runs locally, you must first [download and run a model](/docs/getting-started/providers#local-llms-ollama).**  | `OLLAMA_HOST`                                                                                                                                                                       |
-| [OpenAI](https://platform.openai.com/api-keys)                              | Provides gpt-4o, o1, and other advanced language models. Also supports OpenAI-compatible endpoints (e.g., self-hosted LLaMA, vLLM, KServe). **o1-mini and o1-preview are not supported because Goose uses tool calling.** | `OPENAI_API_KEY`, `OPENAI_HOST` (optional), `OPENAI_ORGANIZATION` (optional), `OPENAI_PROJECT` (optional), `OPENAI_CUSTOM_HEADERS` (optional)                                       |
-| [OpenRouter](https://openrouter.ai/)                                        | API gateway for unified access to various models with features like rate-limiting management.                                                                                                                             | `OPENROUTER_API_KEY`                                                                                                                                                                |
+| Provider                                                                    | Description                                                                                                                                                                                                               | Parameters                                                                                                                                                                                                                       |
+|-----------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| [Amazon Bedrock](https://aws.amazon.com/bedrock/)                           | Offers a variety of foundation models, including Claude, Jurassic-2, and others. **AWS environment variables must be set in advance, not configured through `goose configure`**                                           | `AWS_PROFILE`, or `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, `AWS_REGION`, ...                                                                                                                                                |
+| [Anthropic](https://www.anthropic.com/)                                     | Offers Claude, an advanced AI model for natural language tasks.                                                                                                                                                           | `ANTHROPIC_API_KEY`, `ANTHROPIC_HOST` (optional)                                                                                                                                                                                 |
+| [Azure OpenAI](https://learn.microsoft.com/en-us/azure/ai-services/openai/) | Access Azure-hosted OpenAI models, including GPT-4 and GPT-3.5.                                                                                                                                                           Supports both API key and Azure credential chain authentication.                                                                                          | `AZURE_OPENAI_ENDPOINT`, `AZURE_OPENAI_DEPLOYMENT_NAME`                                                       , `AZURE_OPENAI_API_KEY` (optional)                                                                                           |
+| [Databricks](https://www.databricks.com/)                                   | Unified data analytics and AI platform for building and deploying models.                                                                                                                                                 | `DATABRICKS_HOST`, `DATABRICKS_TOKEN`                                                                                                                                                                                            |
+| [Gemini](https://ai.google.dev/gemini-api/docs)                             | Advanced LLMs by Google with multimodal capabilities (text, images).                                                                                                                                                      | `GOOGLE_API_KEY`                                                                                                                                                                                                                 |
+| [GCP Vertex AI](https://cloud.google.com/vertex-ai)                         | Google Cloud's Vertex AI platform, supporting Gemini and Claude models. **Credentials must be configured in advance. Follow the instructions at https://cloud.google.com/vertex-ai/docs/authentication.**                 | `GCP_PROJECT_ID`, `GCP_LOCATION` and optionally `GCP_MAX_RATE_LIMIT_RETRIES` (5), `GCP_MAX_OVERLOADED_RETRIES` (5), `GCP_INITIAL_RETRY_INTERVAL_MS` (5000), `GCP_BACKOFF_MULTIPLIER` (2.0), `GCP_MAX_RETRY_INTERVAL_MS` (320_000). |
+| [Groq](https://groq.com/)                                                   | High-performance inference hardware and tools for LLMs.                                                                                                                                                                   | `GROQ_API_KEY`                                                                                                                                                                                                                   |
+| [Ollama](https://ollama.com/)                                               | Local model runner supporting Qwen, Llama, DeepSeek, and other open-source models. **Because this provider runs locally, you must first [download and run a model](/docs/getting-started/providers#local-llms-ollama).**  | `OLLAMA_HOST`                                                                                                                                                                                                                    |
+| [OpenAI](https://platform.openai.com/api-keys)                              | Provides gpt-4o, o1, and other advanced language models. Also supports OpenAI-compatible endpoints (e.g., self-hosted LLaMA, vLLM, KServe). **o1-mini and o1-preview are not supported because Goose uses tool calling.** | `OPENAI_API_KEY`, `OPENAI_HOST` (optional), `OPENAI_ORGANIZATION` (optional), `OPENAI_PROJECT` (optional), `OPENAI_CUSTOM_HEADERS` (optional)                                                                                    |
+| [OpenRouter](https://openrouter.ai/)                                        | API gateway for unified access to various models with features like rate-limiting management.                                                                                                                             | `OPENROUTER_API_KEY`                                                                                                                                                                                                             |
 
 
    
diff --git a/ui/desktop/src/components/settings/models/GooseModels.tsx b/ui/desktop/src/components/settings/models/GooseModels.tsx
index 2b73dc4d7f46..544917165f0a 100644
--- a/ui/desktop/src/components/settings/models/GooseModels.tsx
+++ b/ui/desktop/src/components/settings/models/GooseModels.tsx
@@ -25,7 +25,6 @@ export const gooseModels: Model[] = [
   { id: 22, name: 'claude-3-5-sonnet-v2@20241022', provider: 'GCP Vertex AI' },
   { id: 23, name: 'claude-3-5-sonnet@20240620', provider: 'GCP Vertex AI' },
   { id: 24, name: 'claude-3-5-haiku@20241022', provider: 'GCP Vertex AI' },
-  { id: 25, name: 'gemini-2.0-pro-exp-02-05', provider: 'GCP Vertex AI' },
   { id: 26, name: 'gemini-2.0-flash-001', provider: 'GCP Vertex AI' },
   { id: 27, name: 'gemini-1.5-pro-002', provider: 'GCP Vertex AI' },
   { id: 28, name: 'gemini-2.5-pro-exp-03-25', provider: 'GCP Vertex AI' },
diff --git a/ui/desktop/src/components/settings/models/hardcoded_stuff.tsx b/ui/desktop/src/components/settings/models/hardcoded_stuff.tsx
index d8168e6e87ca..8eef9b3d99bd 100644
--- a/ui/desktop/src/components/settings/models/hardcoded_stuff.tsx
+++ b/ui/desktop/src/components/settings/models/hardcoded_stuff.tsx
@@ -13,7 +13,6 @@ export const google_models = [
   'gemini-2.0-flash',
   'gemini-2.0-flash-lite-preview-02-05',
   'gemini-2.0-flash-thinking-exp-01-21',
-  'gemini-2.0-pro-exp-02-05',
   'gemini-2.5-pro-exp-03-25',
 ];
 
@@ -32,7 +31,6 @@ export const gcp_vertex_ai_models = [
   'claude-3-5-haiku@20241022',
   'gemini-1.5-pro-002',
   'gemini-2.0-flash-001',
-  'gemini-2.0-pro-exp-02-05',
   'gemini-2.5-pro-exp-03-25',
 ];