Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 0 additions & 12 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,6 @@ tera = "1"
terminal_size = "0.4"
thiserror = "2"
tokio = { version = "1", features = ["full"] }
tokio-retry = "0.3"
toml = { version = "1.0", features = ["parse", "preserve_order"] }
toml_edit = { version = "0.25", features = ["parse"] }
ubi = { version = "0.9", default-features = false }
Expand Down
100 changes: 99 additions & 1 deletion crates/vfox/src/http.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,107 @@
use reqwest::{Client, ClientBuilder};
use reqwest::{Client, ClientBuilder, StatusCode};
use std::sync::LazyLock;
use std::time::Duration;

pub static CLIENT: LazyLock<Client> = LazyLock::new(|| {
ClientBuilder::new()
.user_agent(format!("vfox.rs/{}", env!("CARGO_PKG_VERSION")))
.build()
.expect("Failed to create reqwest client")
});

/// Default retry attempts when MISE_HTTP_RETRIES is unset. Mirrors the
/// `http_retries` setting default in the main mise crate.
const DEFAULT_HTTP_RETRIES: usize = 3;

/// Backoff schedule (ms) shared with the main mise crate. Hand-rolled rather
/// than using ExponentialBackoff::from_millis (which is geometric in the base
/// value) so the human-readable cadence is obvious. Jitter is applied per delay.
const BACKOFF_SCHEDULE_MS: &[u64] = &[200, 1_000, 4_000, 15_000];

/// Read MISE_HTTP_RETRIES so vfox honors the same opt-out as the rest of mise.
/// vfox is a separate crate without access to mise's Settings layer, so the env
/// var is the only shared signal.
fn http_retries() -> usize {
std::env::var("MISE_HTTP_RETRIES")
.ok()
.and_then(|s| s.parse::<usize>().ok())
.unwrap_or(DEFAULT_HTTP_RETRIES)
}

/// Total attempts = retries + initial attempt.
pub(crate) fn http_retry_attempts() -> usize {
http_retries().saturating_add(1)
}

pub(crate) fn should_retry_status(status: StatusCode) -> bool {
let code = status.as_u16();
code == 408 || code == 429 || (500..600).contains(&code)
}

pub(crate) fn is_transient(err: &reqwest::Error) -> bool {
if err.is_timeout() || err.is_connect() || err.is_body() {
return true;
}
if let Some(status) = err.status() {
return should_retry_status(status);
}
false
}

/// Backoff for the `n`-th retry (0-indexed). Falls back to the longest delay
/// in the schedule for retries beyond it. A small uniform jitter in [50%, 100%]
/// of the base avoids thundering herd while keeping delays at least half the
/// nominal value.
pub(crate) fn retry_delay(attempt: usize) -> Duration {
let base_ms = BACKOFF_SCHEDULE_MS
.get(attempt)
.copied()
.unwrap_or_else(|| *BACKOFF_SCHEDULE_MS.last().unwrap());
// Cheap deterministic-ish jitter from the system clock — vfox is a small
// crate and pulling in `rand` just for this isn't worth it.
let jitter_pct = 50
+ (std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.subsec_nanos() % 50)
.unwrap_or(0)) as u64;
Duration::from_millis(base_ms * jitter_pct / 100)
}

/// Retry an async operation that issues a request AND extracts the body.
/// Use for download/text/bytes flows where mid-stream failures (is_body()) need
/// to restart the whole request. Warns immediately on each transient failure
/// (so users see flakiness without waiting through the backoff). Successful
/// rescues and final exhaustion don't get extra warnings — the caller surfaces
/// the outcome.
pub(crate) async fn retry_async<F, Fut, T>(
url: &str,
mut f: F,
) -> std::result::Result<T, reqwest::Error>
where
F: FnMut() -> Fut,
Fut: std::future::Future<Output = std::result::Result<T, reqwest::Error>>,
{
let attempts = http_retry_attempts().max(1);
let mut last_err: Option<reqwest::Error> = None;
for attempt in 0..attempts {
match f().await {
Ok(value) => return Ok(value),
Err(err) => {
if !is_transient(&err) || attempt + 1 >= attempts {
return Err(err);
}
let delay = retry_delay(attempt);
log::warn!(
"HTTP {} attempt {} failed (transient): {}; retrying in {:?}",
url,
attempt + 1,
err,
delay
);
last_err = Some(err);
tokio::time::sleep(delay).await;
}
}
}
Err(last_err.expect("retry loop should always return"))
}
91 changes: 43 additions & 48 deletions crates/vfox/src/lua_mod/http.rs
Original file line number Diff line number Diff line change
@@ -1,49 +1,49 @@
use mlua::{BorrowedStr, ExternalResult, Lua, MultiValue, Result, Table, Value};
use reqwest::header::{AUTHORIZATION, HeaderMap, HeaderName, HeaderValue};
use reqwest::{RequestBuilder, Response, StatusCode};
use std::time::Duration;
use reqwest::{RequestBuilder, Response};
use url::Url;

use crate::http::CLIENT;

const HTTP_RETRY_ATTEMPTS: usize = 3;

fn should_retry_status(status: StatusCode) -> bool {
matches!(
status,
StatusCode::REQUEST_TIMEOUT
| StatusCode::TOO_MANY_REQUESTS
| StatusCode::BAD_GATEWAY
| StatusCode::SERVICE_UNAVAILABLE
| StatusCode::GATEWAY_TIMEOUT
)
}

fn retry_delay(attempt: usize) -> Duration {
Duration::from_millis(200 * (attempt as u64 + 1))
}
use crate::http::{
CLIENT, http_retry_attempts, is_transient, retry_async, retry_delay, should_retry_status,
};

async fn send_with_retry(builder: RequestBuilder) -> std::result::Result<Response, reqwest::Error> {
let url = builder
.try_clone()
.and_then(|b| b.build().ok())
.map(|r| r.url().to_string())
.unwrap_or_default();
let Some(template) = builder.try_clone() else {
return builder.send().await;
};

for attempt in 0..HTTP_RETRY_ATTEMPTS {
let attempts = http_retry_attempts().max(1);
for attempt in 0..attempts {
let response = template
.try_clone()
.expect("cloned request builder should remain cloneable")
.send()
.await;

match response {
Ok(resp) if should_retry_status(resp.status()) && attempt + 1 < HTTP_RETRY_ATTEMPTS => {
tokio::time::sleep(retry_delay(attempt)).await;
let transient_err: Option<String> = match response {
Ok(resp) if should_retry_status(resp.status()) && attempt + 1 < attempts => {
Some(format!("HTTP {}", resp.status()))
}
Ok(resp) => return Ok(resp),
Err(err) if err.is_timeout() && attempt + 1 < HTTP_RETRY_ATTEMPTS => {
tokio::time::sleep(retry_delay(attempt)).await;
}
Err(err) if is_transient(&err) && attempt + 1 < attempts => Some(err.to_string()),
Err(err) => return Err(err),
};

if let Some(msg) = transient_err {
let delay = retry_delay(attempt);
log::warn!(
"HTTP {} attempt {} failed (transient): {}; retrying in {:?}",
url,
attempt + 1,
msg,
delay
);
tokio::time::sleep(delay).await;
}
}

Expand Down Expand Up @@ -186,12 +186,16 @@ async fn download_file(lua: &Lua, input: MultiValue) -> Result<()> {
};
let headers = add_default_headers(lua, &url, headers);
let path: String = input.iter().nth(1).unwrap().to_string()?;
let resp = send_with_retry(CLIENT.get(&url).headers(headers))
.await
.into_lua_err()?;
resp.error_for_status_ref().into_lua_err()?;
// Retry the whole flow (request + body) so a mid-stream drop restarts the
// download instead of failing.
let bytes = retry_async(&url, || async {
let resp = CLIENT.get(&url).headers(headers.clone()).send().await?;
let resp = resp.error_for_status()?;
resp.bytes().await
})
.await
.into_lua_err()?;
let mut file = tokio::fs::File::create(&path).await.into_lua_err()?;
let bytes = resp.bytes().await.into_lua_err()?;
tokio::io::AsyncWriteExt::write_all(&mut file, &bytes)
.await
.into_lua_err()?;
Expand Down Expand Up @@ -292,22 +296,13 @@ async fn try_download_file(lua: &Lua, input: MultiValue) -> Result<MultiValue> {
]));
}
};
let resp = match send_with_retry(CLIENT.get(&url).headers(headers)).await {
Ok(resp) => resp,
Err(e) => {
return Ok(MultiValue::from_vec(vec![
Value::Nil,
Value::String(lua.create_string(e.to_string())?),
]));
}
};
if let Err(e) = resp.error_for_status_ref() {
return Ok(MultiValue::from_vec(vec![
Value::Nil,
Value::String(lua.create_string(e.to_string())?),
]));
}
let bytes = match resp.bytes().await {
let bytes = match retry_async(&url, || async {
let resp = CLIENT.get(&url).headers(headers.clone()).send().await?;
let resp = resp.error_for_status()?;
resp.bytes().await
})
.await
{
Ok(bytes) => bytes,
Err(e) => {
return Ok(MultiValue::from_vec(vec![
Expand Down
12 changes: 8 additions & 4 deletions crates/vfox/src/vfox.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use crate::hooks::mise_path::MisePathContext;
use crate::hooks::parse_legacy_file::ParseLegacyFileResponse;
use crate::hooks::post_install::PostInstallContext;
use crate::hooks::pre_install::{PreInstall, PreInstallAttestation, VerifiedAttestation};
use crate::http::CLIENT;
use crate::http::{CLIENT, retry_async};
use crate::metadata::Metadata;
use crate::plugin::Plugin;
use crate::registry;
Expand Down Expand Up @@ -414,11 +414,15 @@ impl Vfox {
.download_dir
.join(format!("{sdk}-{version}"))
.join(filename);
let resp = CLIENT.get(url.clone()).send().await?;
resp.error_for_status_ref()?;
let url_str = url.to_string();
let bytes = retry_async(&url_str, || async {
let resp = CLIENT.get(url.clone()).send().await?;
let resp = resp.error_for_status()?;
resp.bytes().await
})
.await?;
file::mkdirp(path.parent().unwrap())?;
let mut file = tokio::fs::File::create(&path).await?;
let bytes = resp.bytes().await?;
tokio::io::AsyncWriteExt::write_all(&mut file, &bytes).await?;
file.sync_all().await?;
Ok(path)
Expand Down
4 changes: 2 additions & 2 deletions schema/mise.json
Original file line number Diff line number Diff line change
Expand Up @@ -971,8 +971,8 @@
}
},
"http_retries": {
"default": 0,
"description": "Number of retries for HTTP requests in mise.",
"default": 3,
"description": "Number of retries for transient HTTP failures in mise.",
"type": "number"
},
"http_timeout": {
Expand Down
15 changes: 12 additions & 3 deletions settings.toml
Original file line number Diff line number Diff line change
Expand Up @@ -947,10 +947,19 @@ env = "MISE_HOOK_ENV_CHPWD_ONLY"
type = "Bool"

[http_retries]
default = 0
description = "Number of retries for HTTP requests in mise."
default = 3
description = "Number of retries for transient HTTP failures in mise."
docs = """
Uses an exponential backoff strategy. The duration is calculated by taking the base (10ms) to the n-th power.
Retries are attempted only on transient errors: HTTP 5xx (server errors), 408
(Request Timeout), 429 (Too Many Requests), and network-layer failures (connect
refused, timeout, mid-stream body drops). Other 4xx responses (e.g. 404) are
treated as permanent and not retried.

Backoff schedule with jitter: ~200ms / ~1s / ~4s / ~15s. Set to 0 to disable
retries entirely.

When a retry rescues a request, a warning is logged with the original error so
flaky infrastructure doesn't silently mask itself.
"""
env = "MISE_HTTP_RETRIES"
type = "Integer"
Expand Down
Loading
Loading