Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion crates/goose-server/src/routes/config_management.rs
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,7 @@ pub async fn get_provider_models(
ProviderError::UsageError(_) => StatusCode::BAD_REQUEST,

// Transient errors - client should retry later
ProviderError::RateLimitExceeded(_) => StatusCode::TOO_MANY_REQUESTS,
ProviderError::RateLimitExceeded { .. } => StatusCode::TOO_MANY_REQUESTS,

// All other errors - internal server error
_ => StatusCode::INTERNAL_SERVER_ERROR,
Expand Down
2 changes: 1 addition & 1 deletion crates/goose/src/agents/subagent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ impl SubAgent {
last_error = Some(anyhow::anyhow!("Context length exceeded"));
break;
}
Err(ProviderError::RateLimitExceeded(_)) => {
Err(ProviderError::RateLimitExceeded { .. }) => {
self.set_status(SubAgentStatus::Completed("Rate limit exceeded".to_string()))
.await;
last_error = Some(anyhow::anyhow!("Rate limit exceeded"));
Expand Down
8 changes: 4 additions & 4 deletions crates/goose/src/providers/bedrock.rs
Original file line number Diff line number Diff line change
Expand Up @@ -137,10 +137,10 @@ impl BedrockProvider {
.await
.map_err(|err| match err.into_service_error() {
ConverseError::ThrottlingException(throttle_err) => {
ProviderError::RateLimitExceeded(format!(
"Bedrock throttling error: {:?}",
throttle_err
))
ProviderError::RateLimitExceeded {
details: format!("Bedrock throttling error: {:?}", throttle_err),
retry_delay: None,
}
}
ConverseError::AccessDeniedException(err) => {
ProviderError::Authentication(format!("Failed to call Bedrock: {:?}", err))
Expand Down
8 changes: 6 additions & 2 deletions crates/goose/src/providers/errors.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use reqwest::StatusCode;
use std::time::Duration;
use thiserror::Error;

#[derive(Error, Debug, PartialEq)]
Expand All @@ -9,8 +10,11 @@ pub enum ProviderError {
#[error("Context length exceeded: {0}")]
ContextLengthExceeded(String),

#[error("Rate limit exceeded: {0}")]
RateLimitExceeded(String),
#[error("Rate limit exceeded: {details}")]
RateLimitExceeded {
details: String,
retry_delay: Option<Duration>,
},

#[error("Server error: {0}")]
ServerError(String),
Expand Down
29 changes: 20 additions & 9 deletions crates/goose/src/providers/gcpvertexai.rs
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,10 @@ impl GcpVertexAIProvider {
self.retry_config.max_retries
);
tracing::error!("{}", error_msg);
return Err(last_error.unwrap_or(ProviderError::RateLimitExceeded(error_msg)));
return Err(last_error.unwrap_or(ProviderError::RateLimitExceeded {
details: error_msg,
retry_delay: None,
}));
}

// Get a fresh auth token for each attempt
Expand Down Expand Up @@ -292,9 +295,10 @@ impl GcpVertexAIProvider {
self.retry_config.max_retries
);
tracing::error!("{}", error_msg);
return Err(
last_error.unwrap_or(ProviderError::RateLimitExceeded(error_msg))
);
return Err(last_error.unwrap_or(ProviderError::RateLimitExceeded {
details: error_msg,
retry_delay: None,
}));
}

// Try to parse response for more detailed error info
Expand All @@ -319,7 +323,10 @@ impl GcpVertexAIProvider {
);

// Store the error in case we need to return it after max retries
last_error = Some(ProviderError::RateLimitExceeded(error_message));
last_error = Some(ProviderError::RateLimitExceeded {
details: error_message,
retry_delay: None,
});

// Calculate and apply the backoff delay
let delay = self.retry_config.delay_for_attempt(rate_limit_attempts);
Expand All @@ -335,9 +342,10 @@ impl GcpVertexAIProvider {
self.retry_config.max_retries
);
tracing::error!("{}", error_msg);
return Err(
last_error.unwrap_or(ProviderError::RateLimitExceeded(error_msg))
);
return Err(last_error.unwrap_or(ProviderError::RateLimitExceeded {
details: error_msg,
retry_delay: None,
}));
}

// Handle 529 Overloaded error (https://docs.anthropic.com/en/api/errors)
Expand All @@ -354,7 +362,10 @@ impl GcpVertexAIProvider {
);

// Store the error in case we need to return it after max retries
last_error = Some(ProviderError::RateLimitExceeded(error_message));
last_error = Some(ProviderError::RateLimitExceeded {
details: error_message,
retry_delay: None,
});

// Calculate and apply the backoff delay
let delay = self.retry_config.delay_for_attempt(overloaded_attempts);
Expand Down
7 changes: 6 additions & 1 deletion crates/goose/src/providers/openrouter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,12 @@ impl OpenRouterProvider {
// Return appropriate error based on the OpenRouter error code
match error_code {
401 | 403 => return Err(ProviderError::Authentication(error_message.to_string())),
429 => return Err(ProviderError::RateLimitExceeded(error_message.to_string())),
429 => {
return Err(ProviderError::RateLimitExceeded {
details: error_message.to_string(),
retry_delay: None,
})
}
500 | 503 => return Err(ProviderError::ServerError(error_message.to_string())),
_ => return Err(ProviderError::RequestFailed(error_message.to_string())),
}
Expand Down
11 changes: 9 additions & 2 deletions crates/goose/src/providers/retry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ pub trait ProviderRetry {
Err(error) => {
let should_retry = matches!(
error,
ProviderError::RateLimitExceeded(_) | ProviderError::ServerError(_)
ProviderError::RateLimitExceeded { .. } | ProviderError::ServerError(_)
);

if should_retry && attempts < config.max_retries {
Expand All @@ -101,7 +101,14 @@ pub trait ProviderRetry {
error
);

let delay = config.delay_for_attempt(attempts);
let delay = match &error {
ProviderError::RateLimitExceeded {
retry_delay: Some(provider_delay),
..
} => *provider_delay,
_ => config.delay_for_attempt(attempts),
};

tracing::info!("Backing off for {:?} before retry", delay);
sleep(delay).await;
continue;
Expand Down
7 changes: 6 additions & 1 deletion crates/goose/src/providers/tetrate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,12 @@ impl TetrateProvider {
// Return appropriate error based on the error code
match error_code {
401 | 403 => return Err(ProviderError::Authentication(error_message.to_string())),
429 => return Err(ProviderError::RateLimitExceeded(error_message.to_string())),
429 => {
return Err(ProviderError::RateLimitExceeded {
details: error_message.to_string(),
retry_delay: None,
})
}
500 | 503 => return Err(ProviderError::ServerError(error_message.to_string())),
_ => return Err(ProviderError::RequestFailed(error_message.to_string())),
}
Expand Down
64 changes: 60 additions & 4 deletions crates/goose/src/providers/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use serde::{Deserialize, Serialize};
use serde_json::{json, Map, Value};
use std::io::Read;
use std::path::Path;
use std::time::Duration;

use crate::providers::errors::{OpenAIError, ProviderError};

Expand Down Expand Up @@ -105,7 +106,10 @@ pub fn map_http_error_to_provider_error(
))
}
}
StatusCode::TOO_MANY_REQUESTS => ProviderError::RateLimitExceeded(format!("{:?}", payload)),
StatusCode::TOO_MANY_REQUESTS => ProviderError::RateLimitExceeded {
details: format!("{:?}", payload),
retry_delay: None,
},
_ if status.is_server_error() => ProviderError::ServerError(format!("{:?}", payload)),
_ => ProviderError::RequestFailed(format!("Request failed with status: {}", status)),
};
Expand Down Expand Up @@ -212,6 +216,31 @@ fn get_google_final_status(status: StatusCode, payload: Option<&Value>) -> Statu
status
}

fn parse_google_retry_delay(payload: &Value) -> Option<Duration> {
payload
.get("error")
.and_then(|error| error.get("details"))
.and_then(|details| details.as_array())
.and_then(|details_array| {
details_array.iter().find_map(|detail| {
if detail
.get("@type")
.and_then(|t| t.as_str())
.is_some_and(|s| s.ends_with("RetryInfo"))
{
detail
.get("retryDelay")
.and_then(|delay| delay.as_str())
.and_then(|s| s.strip_suffix('s'))
.and_then(|num| num.parse::<u64>().ok())
.map(Duration::from_secs)
} else {
None
}
})
})
}

/// Handle response from Google Gemini API-compatible endpoints.
///
/// Processes HTTP responses, handling specific statuses and parsing the payload
Expand Down Expand Up @@ -253,6 +282,13 @@ pub async fn handle_response_google_compat(response: Response) -> Result<Value,
);
Err(ProviderError::RequestFailed(format!("Request failed with status: {}. Message: {}", final_status, error_msg)))
}
StatusCode::TOO_MANY_REQUESTS => {
let retry_delay = payload.as_ref().and_then(parse_google_retry_delay);
Err(ProviderError::RateLimitExceeded {
details: format!("{:?}", payload),
retry_delay,
})
}
_ if final_status.is_server_error() => {
Err(ProviderError::ServerError(format!("{:?}", payload)))
}
Expand Down Expand Up @@ -804,6 +840,25 @@ mod tests {
"Hello\\u0001World"
);
}

#[test]
fn test_parse_google_retry_delay() {
let payload = json!({
"error": {
"details": [
{
"@type": "type.googleapis.com/google.rpc.RetryInfo",
"retryDelay": "42s"
}
]
}
});
assert_eq!(
parse_google_retry_delay(&payload),
Some(Duration::from_secs(42))
);
}

#[tokio::test]
async fn test_handle_status_openai_compat() {
let test_cases = vec![
Expand Down Expand Up @@ -969,9 +1024,10 @@ mod tests {
(
StatusCode::TOO_MANY_REQUESTS,
Some(json!({"retry_after": 60})),
ProviderError::RateLimitExceeded(
"Some(Object {\"retry_after\": Number(60)})".to_string(),
),
ProviderError::RateLimitExceeded{
details: "Some(Object {\"retry_after\": Number(60)})".to_string(),
retry_delay: None,
},
),
// is_server_error() without payload
(
Expand Down
Loading