From dfbb7e41064a65d133fba507841c57a52ec0f6cf Mon Sep 17 00:00:00 2001 From: Uddhav Kambli Date: Sun, 20 Apr 2025 01:13:35 -0400 Subject: [PATCH] feat(gcpvertexai): do HTTP 429 like retries for Anthropic API HTTP 529 overloaded status code --- .../src/routes/providers_and_keys.json | 2 +- .../src/providers/formats/gcpvertexai.rs | 6 - crates/goose/src/providers/gcpvertexai.rs | 286 +++++++++++++----- .../docs/getting-started/providers.md | 24 +- .../settings/models/GooseModels.tsx | 1 - .../settings/models/hardcoded_stuff.tsx | 2 - 6 files changed, 227 insertions(+), 94 deletions(-) diff --git a/crates/goose-server/src/routes/providers_and_keys.json b/crates/goose-server/src/routes/providers_and_keys.json index 830cf665b365..ff950d58b8cc 100644 --- a/crates/goose-server/src/routes/providers_and_keys.json +++ b/crates/goose-server/src/routes/providers_and_keys.json @@ -20,7 +20,7 @@ "gcp_vertex_ai": { "name": "GCP Vertex AI", "description": "Use Vertex AI platform models", - "models": ["claude-3-5-haiku@20241022", "claude-3-5-sonnet@20240620", "claude-3-5-sonnet-v2@20241022", "claude-3-7-sonnet@20250219", "gemini-1.5-pro-002", "gemini-2.0-flash-001", "gemini-2.0-pro-exp-02-05", "gemini-2.5-pro-exp-03-25"], + "models": ["claude-3-5-haiku@20241022", "claude-3-5-sonnet@20240620", "claude-3-5-sonnet-v2@20241022", "claude-3-7-sonnet@20250219", "gemini-1.5-pro-002", "gemini-2.0-flash-001", "gemini-2.5-pro-exp-03-25"], "required_keys": ["GCP_PROJECT_ID", "GCP_LOCATION"] }, "google": { diff --git a/crates/goose/src/providers/formats/gcpvertexai.rs b/crates/goose/src/providers/formats/gcpvertexai.rs index b4c57e56ba9c..8d0c1c94cb5d 100644 --- a/crates/goose/src/providers/formats/gcpvertexai.rs +++ b/crates/goose/src/providers/formats/gcpvertexai.rs @@ -92,8 +92,6 @@ pub enum GeminiVersion { Pro15, /// Gemini 2.0 Flash version Flash20, - /// Gemini 2.0 Pro Experimental version - Pro20Exp, /// Gemini 2.5 Pro Experimental version Pro25Exp, /// Generic Gemini model for custom or new versions @@ -113,7 +111,6 @@ impl fmt::Display for GcpVertexAIModel { Self::Gemini(version) => match version { GeminiVersion::Pro15 => "gemini-1.5-pro-002", GeminiVersion::Flash20 => "gemini-2.0-flash-001", - GeminiVersion::Pro20Exp => "gemini-2.0-pro-exp-02-05", GeminiVersion::Pro25Exp => "gemini-2.5-pro-exp-03-25", GeminiVersion::Generic(name) => name, }, @@ -148,7 +145,6 @@ impl TryFrom<&str> for GcpVertexAIModel { "claude-3-5-haiku@20241022" => Ok(Self::Claude(ClaudeVersion::Haiku35)), "gemini-1.5-pro-002" => Ok(Self::Gemini(GeminiVersion::Pro15)), "gemini-2.0-flash-001" => Ok(Self::Gemini(GeminiVersion::Flash20)), - "gemini-2.0-pro-exp-02-05" => Ok(Self::Gemini(GeminiVersion::Pro20Exp)), "gemini-2.5-pro-exp-03-25" => Ok(Self::Gemini(GeminiVersion::Pro25Exp)), // Generic models based on prefix matching _ if s.starts_with("claude-") => { @@ -342,7 +338,6 @@ mod tests { "claude-3-5-haiku@20241022", "gemini-1.5-pro-002", "gemini-2.0-flash-001", - "gemini-2.0-pro-exp-02-05", "gemini-2.5-pro-exp-03-25", ]; @@ -364,7 +359,6 @@ mod tests { ("claude-3-5-haiku@20241022", GcpLocation::Ohio), ("gemini-1.5-pro-002", GcpLocation::Iowa), ("gemini-2.0-flash-001", GcpLocation::Iowa), - ("gemini-2.0-pro-exp-02-05", GcpLocation::Iowa), ("gemini-2.5-pro-exp-03-25", GcpLocation::Iowa), ]; diff --git a/crates/goose/src/providers/gcpvertexai.rs b/crates/goose/src/providers/gcpvertexai.rs index f0b6d6ef5b70..09d81c55370f 100644 --- a/crates/goose/src/providers/gcpvertexai.rs +++ b/crates/goose/src/providers/gcpvertexai.rs @@ -2,6 +2,7 @@ use std::time::Duration; use anyhow::Result; use async_trait::async_trait; +use once_cell::sync::Lazy; use reqwest::{Client, StatusCode}; use serde_json::Value; use tokio::time::sleep; @@ -34,6 +35,9 @@ const DEFAULT_MAX_RETRIES: usize = 6; const DEFAULT_BACKOFF_MULTIPLIER: f64 = 2.0; /// Default maximum interval for retry (in milliseconds) const DEFAULT_MAX_RETRY_INTERVAL_MS: u64 = 320_000; +/// Status code for Anthropic's API overloaded error (529) +static STATUS_API_OVERLOADED: Lazy = + Lazy::new(|| StatusCode::from_u16(529).expect("Valid status code 529 for API_OVERLOADED")); /// Represents errors specific to GCP Vertex AI operations. #[derive(Debug, thiserror::Error)] @@ -50,8 +54,10 @@ enum GcpVertexAIError { /// Retry configuration for handling rate limit errors #[derive(Debug, Clone)] struct RetryConfig { - /// Maximum number of retry attempts - max_retries: usize, + /// Maximum number of retry attempts for 429 errors + max_rate_limit_retries: usize, + /// Maximum number of retry attempts for 529 errors + max_overloaded_retries: usize, /// Initial interval between retries in milliseconds initial_interval_ms: u64, /// Multiplier for backoff (exponential) @@ -63,7 +69,8 @@ struct RetryConfig { impl Default for RetryConfig { fn default() -> Self { Self { - max_retries: DEFAULT_MAX_RETRIES, + max_rate_limit_retries: DEFAULT_MAX_RETRIES, + max_overloaded_retries: DEFAULT_MAX_RETRIES, initial_interval_ms: DEFAULT_INITIAL_RETRY_INTERVAL_MS, backoff_multiplier: DEFAULT_BACKOFF_MULTIPLIER, max_interval_ms: DEFAULT_MAX_RETRY_INTERVAL_MS, @@ -92,6 +99,19 @@ impl RetryConfig { Duration::from_millis(jittered_delay_ms) } + + /// Get max retries for a specific error type + #[allow(dead_code)] // Used in tests + fn max_retries_for_status(&self, status: StatusCode) -> usize { + if status == StatusCode::TOO_MANY_REQUESTS { + self.max_rate_limit_retries + } else if status == *STATUS_API_OVERLOADED { + self.max_overloaded_retries + } else { + // Default to rate limit retries for any other status code + self.max_rate_limit_retries + } + } } /// Provider implementation for Google Cloud Platform's Vertex AI service. @@ -172,10 +192,32 @@ impl GcpVertexAIProvider { /// Loads retry configuration from environment variables or uses defaults. fn load_retry_config(config: &crate::config::Config) -> RetryConfig { - let max_retries = config - .get_param("GCP_MAX_RETRIES") + // Load max retries for 429 rate limit errors + let max_rate_limit_retries = config + .get_param("GCP_MAX_RATE_LIMIT_RETRIES") + .ok() + .and_then(|v: String| v.parse::().ok()) + .or_else(|| { + // Fall back to generic GCP_MAX_RETRIES if specific one isn't set + config + .get_param("GCP_MAX_RETRIES") + .ok() + .and_then(|v: String| v.parse::().ok()) + }) + .unwrap_or(DEFAULT_MAX_RETRIES); + + // Load max retries for 529 API overloaded errors + let max_overloaded_retries = config + .get_param("GCP_MAX_OVERLOADED_RETRIES") .ok() .and_then(|v: String| v.parse::().ok()) + .or_else(|| { + // Fall back to generic GCP_MAX_RETRIES if specific one isn't set + config + .get_param("GCP_MAX_RETRIES") + .ok() + .and_then(|v: String| v.parse::().ok()) + }) .unwrap_or(DEFAULT_MAX_RETRIES); let initial_interval_ms = config @@ -197,7 +239,8 @@ impl GcpVertexAIProvider { .unwrap_or(DEFAULT_MAX_RETRY_INTERVAL_MS); RetryConfig { - max_retries, + max_rate_limit_retries, + max_overloaded_retries, initial_interval_ms, backoff_multiplier, max_interval_ms, @@ -269,7 +312,7 @@ impl GcpVertexAIProvider { } /// Makes an authenticated POST request to the Vertex AI API at a specific location. - /// Includes retry logic for 429 Too Many Requests errors. + /// Includes retry logic for 429 (Too Many Requests) and 529 (API Overloaded) errors. /// /// # Arguments /// * `payload` - The request payload to send @@ -285,21 +328,12 @@ impl GcpVertexAIProvider { .build_request_url(context.provider(), location) .map_err(|e| ProviderError::RequestFailed(e.to_string()))?; - // Initialize retry counter - let mut attempts = 0; + // Initialize separate counters for different error types + let mut rate_limit_attempts = 0; + let mut overloaded_attempts = 0; let mut last_error = None; loop { - // Check if we've exceeded max retries - if attempts > 0 && attempts > self.retry_config.max_retries { - let error_msg = format!( - "Exceeded maximum retry attempts ({}) for rate limiting (429)", - self.retry_config.max_retries - ); - tracing::error!("{}", error_msg); - return Err(last_error.unwrap_or(ProviderError::RateLimitExceeded(error_msg))); - } - // Get a fresh auth token for each attempt let auth_header = self .get_auth_header() @@ -318,60 +352,116 @@ impl GcpVertexAIProvider { let status = response.status(); - // If not a 429, process normally - if status != StatusCode::TOO_MANY_REQUESTS { - let response_json = response.json::().await.map_err(|e| { - ProviderError::RequestFailed(format!("Failed to parse response: {e}")) - })?; - - return match status { - StatusCode::OK => Ok(response_json), - StatusCode::UNAUTHORIZED | StatusCode::FORBIDDEN => { - tracing::debug!( - "Authentication failed. Status: {status}, Payload: {payload:?}" + // Handle 429 Too Many Requests and 529 API Overloaded errors + match status { + status if status == StatusCode::TOO_MANY_REQUESTS => { + rate_limit_attempts += 1; + + if rate_limit_attempts > self.retry_config.max_rate_limit_retries { + let error_msg = format!( + "Exceeded maximum retry attempts ({}) for rate limiting (429) errors", + self.retry_config.max_rate_limit_retries + ); + tracing::error!("{}", error_msg); + return Err( + last_error.unwrap_or(ProviderError::RateLimitExceeded(error_msg)) ); - Err(ProviderError::Authentication(format!( - "Authentication failed: {response_json:?}" - ))) } - _ => { - tracing::debug!( - "Request failed. Status: {status}, Response: {response_json:?}" + + // Try to parse response for more detailed error info + let cite_gcp_vertex_429 = + "See https://cloud.google.com/vertex-ai/generative-ai/docs/error-code-429"; + let response_text = response.text().await.unwrap_or_default(); + + let error_message = + if response_text.contains("Exceeded the Provisioned Throughput") { + // Handle 429 rate limit due to throughput limits + format!("Exceeded the Provisioned Throughput: {cite_gcp_vertex_429}") + } else { + // Handle generic 429 rate limit + format!("Pay-as-you-go resource exhausted: {cite_gcp_vertex_429}") + }; + + tracing::warn!( + "Rate limit exceeded error (429) (attempt {}/{}): {}. Retrying after backoff...", + rate_limit_attempts, + self.retry_config.max_rate_limit_retries, + error_message + ); + + // Store the error in case we need to return it after max retries + last_error = Some(ProviderError::RateLimitExceeded(error_message)); + + // Calculate and apply the backoff delay + let delay = self.retry_config.delay_for_attempt(rate_limit_attempts); + tracing::info!("Backing off for {:?} before retry (rate limit 429)", delay); + sleep(delay).await; + } + status if status == *STATUS_API_OVERLOADED => { + overloaded_attempts += 1; + + if overloaded_attempts > self.retry_config.max_overloaded_retries { + let error_msg = format!( + "Exceeded maximum retry attempts ({}) for API overloaded (529) errors", + self.retry_config.max_overloaded_retries + ); + tracing::error!("{}", error_msg); + return Err( + last_error.unwrap_or(ProviderError::RateLimitExceeded(error_msg)) ); - Err(ProviderError::RequestFailed(format!( - "Request failed with status {status}: {response_json:?}" - ))) } - }; - } - // Handle 429 Too Many Requests - attempts += 1; - - // Try to parse response for more detailed error info - let cite_gcp_vertex_429 = - "See https://cloud.google.com/vertex-ai/generative-ai/docs/error-code-429"; - let response_text = response.text().await.unwrap_or_default(); - let quota_error = if response_text.contains("Exceeded the Provisioned Throughput") { - format!("Exceeded the Provisioned Throughput: {cite_gcp_vertex_429}.") - } else { - format!("Pay-as-you-go resource exhausted: {cite_gcp_vertex_429}.") - }; - - tracing::warn!( - "Rate limit exceeded (attempt {}/{}): {}. Retrying after backoff...", - attempts, - self.retry_config.max_retries, - quota_error - ); - - // Store the error in case we need to return it after max retries - last_error = Some(ProviderError::RateLimitExceeded(quota_error)); - - // Calculate and apply the backoff delay - let delay = self.retry_config.delay_for_attempt(attempts); - tracing::info!("Backing off for {:?} before retry", delay); - sleep(delay).await; + // Handle 529 Overloaded error (https://docs.anthropic.com/en/api/errors) + let error_message = + "Vertex AI Provider API is temporarily overloaded. This is similar to a rate limit \ + error but indicates backend processing capacity issues." + .to_string(); + + tracing::warn!( + "API overloaded error (529) (attempt {}/{}): {}. Retrying after backoff...", + overloaded_attempts, + self.retry_config.max_overloaded_retries, + error_message + ); + + // Store the error in case we need to return it after max retries + last_error = Some(ProviderError::RateLimitExceeded(error_message)); + + // Calculate and apply the backoff delay + let delay = self.retry_config.delay_for_attempt(overloaded_attempts); + tracing::info!( + "Backing off for {:?} before retry (API overloaded 529)", + delay + ); + sleep(delay).await; + } + // For any other status codes, process normally + _ => { + let response_json = response.json::().await.map_err(|e| { + ProviderError::RequestFailed(format!("Failed to parse response: {e}")) + })?; + + return match status { + StatusCode::OK => Ok(response_json), + StatusCode::UNAUTHORIZED | StatusCode::FORBIDDEN => { + tracing::debug!( + "Authentication failed. Status: {status}, Payload: {payload:?}" + ); + Err(ProviderError::Authentication(format!( + "Authentication failed: {response_json:?}" + ))) + } + _ => { + tracing::debug!( + "Request failed. Status: {status}, Response: {response_json:?}" + ); + Err(ProviderError::RequestFailed(format!( + "Request failed with status {status}: {response_json:?}" + ))) + } + }; + } + } } } @@ -432,7 +522,7 @@ impl Provider for GcpVertexAIProvider { GcpVertexAIModel::Claude(ClaudeVersion::Haiku35), GcpVertexAIModel::Gemini(GeminiVersion::Pro15), GcpVertexAIModel::Gemini(GeminiVersion::Flash20), - GcpVertexAIModel::Gemini(GeminiVersion::Pro20Exp), + GcpVertexAIModel::Gemini(GeminiVersion::Pro25Exp), ] .iter() .map(|model| model.to_string()) @@ -452,6 +542,18 @@ impl Provider for GcpVertexAIProvider { vec![ ConfigKey::new("GCP_PROJECT_ID", true, false, None), ConfigKey::new("GCP_LOCATION", true, false, Some(Iowa.to_string().as_str())), + ConfigKey::new( + "GCP_MAX_RATE_LIMIT_RETRIES", + false, + false, + Some(&DEFAULT_MAX_RETRIES.to_string()), + ), + ConfigKey::new( + "GCP_MAX_OVERLOADED_RETRIES", + false, + false, + Some(&DEFAULT_MAX_RETRIES.to_string()), + ), ConfigKey::new( "GCP_MAX_RETRIES", false, @@ -521,11 +623,13 @@ impl Provider for GcpVertexAIProvider { #[cfg(test)] mod tests { use super::*; + use reqwest::StatusCode; #[test] fn test_retry_config_delay_calculation() { let config = RetryConfig { - max_retries: 5, + max_rate_limit_retries: 5, + max_overloaded_retries: 5, initial_interval_ms: 1000, backoff_multiplier: 2.0, max_interval_ms: 32000, @@ -548,6 +652,44 @@ mod tests { assert!(delay10.as_millis() <= 38400); // max_interval_ms * 1.2 (max jitter) } + #[test] + fn test_max_retries_for_status() { + let config = RetryConfig { + max_rate_limit_retries: 5, + max_overloaded_retries: 10, + initial_interval_ms: 1000, + backoff_multiplier: 2.0, + max_interval_ms: 32000, + }; + + // Check that we get the right max retries for each error type + assert_eq!( + config.max_retries_for_status(StatusCode::TOO_MANY_REQUESTS), + 5 + ); + assert_eq!(config.max_retries_for_status(*STATUS_API_OVERLOADED), 10); + + // For any other status code, we should get the rate limit retries + assert_eq!(config.max_retries_for_status(StatusCode::BAD_REQUEST), 5); + } + + #[test] + fn test_status_overloaded_code() { + // Test that we correctly handle the 529 status code + + // Verify the custom status code is created correctly + assert_eq!(STATUS_API_OVERLOADED.as_u16(), 529); + + // This is not a standard HTTP status code, so it's classified as server error + assert!(STATUS_API_OVERLOADED.is_server_error()); + + // Should be different from TOO_MANY_REQUESTS (429) + assert_ne!(*STATUS_API_OVERLOADED, StatusCode::TOO_MANY_REQUESTS); + + // Should be different from SERVICE_UNAVAILABLE (503) + assert_ne!(*STATUS_API_OVERLOADED, StatusCode::SERVICE_UNAVAILABLE); + } + #[test] fn test_model_provider_conversion() { assert_eq!(ModelProvider::Anthropic.as_str(), "anthropic"); @@ -592,7 +734,7 @@ mod tests { .collect(); assert!(model_names.contains(&"claude-3-5-sonnet-v2@20241022".to_string())); assert!(model_names.contains(&"gemini-1.5-pro-002".to_string())); - // Should contain the original 2 config keys plus 4 new retry-related ones - assert_eq!(metadata.config_keys.len(), 6); + // Should contain the original 2 config keys plus 6 new retry-related ones + assert_eq!(metadata.config_keys.len(), 8); } } diff --git a/documentation/docs/getting-started/providers.md b/documentation/docs/getting-started/providers.md index 189357c4a50a..9807ae30fa5b 100644 --- a/documentation/docs/getting-started/providers.md +++ b/documentation/docs/getting-started/providers.md @@ -17,18 +17,18 @@ Goose relies heavily on tool calling capabilities and currently works best with ## Available Providers -| Provider | Description | Parameters | -|-----------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| [Amazon Bedrock](https://aws.amazon.com/bedrock/) | Offers a variety of foundation models, including Claude, Jurassic-2, and others. **AWS environment variables must be set in advance, not configured through `goose configure`** | `AWS_PROFILE`, or `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, `AWS_REGION`, ... | -| [Anthropic](https://www.anthropic.com/) | Offers Claude, an advanced AI model for natural language tasks. | `ANTHROPIC_API_KEY`, `ANTHROPIC_HOST` (optional) | -| [Azure OpenAI](https://learn.microsoft.com/en-us/azure/ai-services/openai/) | Access Azure-hosted OpenAI models, including GPT-4 and GPT-3.5. Supports both API key and Azure credential chain authentication. | `AZURE_OPENAI_ENDPOINT`, `AZURE_OPENAI_DEPLOYMENT_NAME`, `AZURE_OPENAI_API_KEY` (optional) | -| [Databricks](https://www.databricks.com/) | Unified data analytics and AI platform for building and deploying models. | `DATABRICKS_HOST`, `DATABRICKS_TOKEN` | -| [Gemini](https://ai.google.dev/gemini-api/docs) | Advanced LLMs by Google with multimodal capabilities (text, images). | `GOOGLE_API_KEY` | -| [GCP Vertex AI](https://cloud.google.com/vertex-ai) | Google Cloud's Vertex AI platform, supporting Gemini and Claude models. **Credentials must be configured in advance. Follow the instructions at https://cloud.google.com/vertex-ai/docs/authentication.** | `GCP_PROJECT_ID`, `GCP_LOCATION` and optional `GCP_MAX_RETRIES` (6), `GCP_INITIAL_RETRY_INTERVAL_MS` (5000), `GCP_BACKOFF_MULTIPLIER` (2.0), `GCP_MAX_RETRY_INTERVAL_MS` (320_000). | -| [Groq](https://groq.com/) | High-performance inference hardware and tools for LLMs. | `GROQ_API_KEY` | -| [Ollama](https://ollama.com/) | Local model runner supporting Qwen, Llama, DeepSeek, and other open-source models. **Because this provider runs locally, you must first [download and run a model](/docs/getting-started/providers#local-llms-ollama).** | `OLLAMA_HOST` | -| [OpenAI](https://platform.openai.com/api-keys) | Provides gpt-4o, o1, and other advanced language models. Also supports OpenAI-compatible endpoints (e.g., self-hosted LLaMA, vLLM, KServe). **o1-mini and o1-preview are not supported because Goose uses tool calling.** | `OPENAI_API_KEY`, `OPENAI_HOST` (optional), `OPENAI_ORGANIZATION` (optional), `OPENAI_PROJECT` (optional), `OPENAI_CUSTOM_HEADERS` (optional) | -| [OpenRouter](https://openrouter.ai/) | API gateway for unified access to various models with features like rate-limiting management. | `OPENROUTER_API_KEY` | +| Provider | Description | Parameters | +|-----------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| [Amazon Bedrock](https://aws.amazon.com/bedrock/) | Offers a variety of foundation models, including Claude, Jurassic-2, and others. **AWS environment variables must be set in advance, not configured through `goose configure`** | `AWS_PROFILE`, or `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, `AWS_REGION`, ... | +| [Anthropic](https://www.anthropic.com/) | Offers Claude, an advanced AI model for natural language tasks. | `ANTHROPIC_API_KEY`, `ANTHROPIC_HOST` (optional) | +| [Azure OpenAI](https://learn.microsoft.com/en-us/azure/ai-services/openai/) | Access Azure-hosted OpenAI models, including GPT-4 and GPT-3.5. Supports both API key and Azure credential chain authentication. | `AZURE_OPENAI_ENDPOINT`, `AZURE_OPENAI_DEPLOYMENT_NAME` , `AZURE_OPENAI_API_KEY` (optional) | +| [Databricks](https://www.databricks.com/) | Unified data analytics and AI platform for building and deploying models. | `DATABRICKS_HOST`, `DATABRICKS_TOKEN` | +| [Gemini](https://ai.google.dev/gemini-api/docs) | Advanced LLMs by Google with multimodal capabilities (text, images). | `GOOGLE_API_KEY` | +| [GCP Vertex AI](https://cloud.google.com/vertex-ai) | Google Cloud's Vertex AI platform, supporting Gemini and Claude models. **Credentials must be configured in advance. Follow the instructions at https://cloud.google.com/vertex-ai/docs/authentication.** | `GCP_PROJECT_ID`, `GCP_LOCATION` and optionally `GCP_MAX_RATE_LIMIT_RETRIES` (5), `GCP_MAX_OVERLOADED_RETRIES` (5), `GCP_INITIAL_RETRY_INTERVAL_MS` (5000), `GCP_BACKOFF_MULTIPLIER` (2.0), `GCP_MAX_RETRY_INTERVAL_MS` (320_000). | +| [Groq](https://groq.com/) | High-performance inference hardware and tools for LLMs. | `GROQ_API_KEY` | +| [Ollama](https://ollama.com/) | Local model runner supporting Qwen, Llama, DeepSeek, and other open-source models. **Because this provider runs locally, you must first [download and run a model](/docs/getting-started/providers#local-llms-ollama).** | `OLLAMA_HOST` | +| [OpenAI](https://platform.openai.com/api-keys) | Provides gpt-4o, o1, and other advanced language models. Also supports OpenAI-compatible endpoints (e.g., self-hosted LLaMA, vLLM, KServe). **o1-mini and o1-preview are not supported because Goose uses tool calling.** | `OPENAI_API_KEY`, `OPENAI_HOST` (optional), `OPENAI_ORGANIZATION` (optional), `OPENAI_PROJECT` (optional), `OPENAI_CUSTOM_HEADERS` (optional) | +| [OpenRouter](https://openrouter.ai/) | API gateway for unified access to various models with features like rate-limiting management. | `OPENROUTER_API_KEY` | diff --git a/ui/desktop/src/components/settings/models/GooseModels.tsx b/ui/desktop/src/components/settings/models/GooseModels.tsx index 2b73dc4d7f46..544917165f0a 100644 --- a/ui/desktop/src/components/settings/models/GooseModels.tsx +++ b/ui/desktop/src/components/settings/models/GooseModels.tsx @@ -25,7 +25,6 @@ export const gooseModels: Model[] = [ { id: 22, name: 'claude-3-5-sonnet-v2@20241022', provider: 'GCP Vertex AI' }, { id: 23, name: 'claude-3-5-sonnet@20240620', provider: 'GCP Vertex AI' }, { id: 24, name: 'claude-3-5-haiku@20241022', provider: 'GCP Vertex AI' }, - { id: 25, name: 'gemini-2.0-pro-exp-02-05', provider: 'GCP Vertex AI' }, { id: 26, name: 'gemini-2.0-flash-001', provider: 'GCP Vertex AI' }, { id: 27, name: 'gemini-1.5-pro-002', provider: 'GCP Vertex AI' }, { id: 28, name: 'gemini-2.5-pro-exp-03-25', provider: 'GCP Vertex AI' }, diff --git a/ui/desktop/src/components/settings/models/hardcoded_stuff.tsx b/ui/desktop/src/components/settings/models/hardcoded_stuff.tsx index d8168e6e87ca..8eef9b3d99bd 100644 --- a/ui/desktop/src/components/settings/models/hardcoded_stuff.tsx +++ b/ui/desktop/src/components/settings/models/hardcoded_stuff.tsx @@ -13,7 +13,6 @@ export const google_models = [ 'gemini-2.0-flash', 'gemini-2.0-flash-lite-preview-02-05', 'gemini-2.0-flash-thinking-exp-01-21', - 'gemini-2.0-pro-exp-02-05', 'gemini-2.5-pro-exp-03-25', ]; @@ -32,7 +31,6 @@ export const gcp_vertex_ai_models = [ 'claude-3-5-haiku@20241022', 'gemini-1.5-pro-002', 'gemini-2.0-flash-001', - 'gemini-2.0-pro-exp-02-05', 'gemini-2.5-pro-exp-03-25', ];