From fd96babc8d1bc06d79c00a92b086d8339dbe757e Mon Sep 17 00:00:00 2001 From: ThomasTSteinbach Date: Sat, 6 Sep 2025 14:55:48 +0200 Subject: [PATCH 1/9] feat(http): add URL replacement feature for HTTP requests - Support string and regex-based URL replacements via url_replacements setting - Enable protocol, hostname, and path modifications for HTTP downloads - Configure replacements in mise.toml or globally in settings --- .gitignore | 3 + build.rs | 17 +- docs/settings.data.ts | 2 + schema/mise.json | 7 + settings.toml | 143 ++++++++++++++++ src/cli/config/set.rs | 1 + src/cli/settings/ls.rs | 1 + src/cli/settings/set.rs | 15 +- src/config/settings.rs | 7 + src/http.rs | 344 +++++++++++++++++++++++++++++++++++++- xtasks/render/settings.ts | 11 ++ 11 files changed, 546 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index 1a49d72606..3905e6819c 100644 --- a/.gitignore +++ b/.gitignore @@ -34,3 +34,6 @@ testResults.xml comment.md .claude/settings.local.json +.serena +AGENTS.md +opencode.json diff --git a/build.rs b/build.rs index 96d91ff9e9..4183fe1d7e 100644 --- a/build.rs +++ b/build.rs @@ -195,6 +195,7 @@ pub struct Settings {"# "ListString" => "Vec", "ListPath" => "Vec", "SetString" => "BTreeSet", + "IndexMap" => "IndexMap", t => panic!("Unknown type: {t}"), })); if let Some(type_) = type_ { @@ -276,9 +277,15 @@ pub static SETTINGS_META: Lazy> = Lazy::new for (name, props) in &settings { let props = props.as_table().unwrap(); if let Some(type_) = props.get("type").map(|v| v.as_str().unwrap()) { + // We could shadow the 'type_' variable, but its a best practice to avoid shadowing. + // Thus, we introduce 'meta_type' here. + let meta_type = match type_ { + "IndexMap" => "IndexMap", + other => other, + }; lines.push(format!( r#" "{name}" => SettingsMeta {{ - type_: SettingsType::{type_},"#, + type_: SettingsType::{meta_type},"#, )); if let Some(description) = props.get("description") { let description = description.as_str().unwrap().to_string(); @@ -293,9 +300,15 @@ pub static SETTINGS_META: Lazy> = Lazy::new for (key, props) in props.as_table().unwrap() { let props = props.as_table().unwrap(); if let Some(type_) = props.get("type").map(|v| v.as_str().unwrap()) { + // We could shadow the 'type_' variable, but its a best practice to avoid shadowing. + // Thus, we introduce 'meta_type' here. + let meta_type = match type_ { + "IndexMap" => "IndexMap", + other => other, + }; lines.push(format!( r#" "{name}.{key}" => SettingsMeta {{ - type_: SettingsType::{type_},"#, + type_: SettingsType::{meta_type},"#, )); } if let Some(description) = props.get("description") { diff --git a/docs/settings.data.ts b/docs/settings.data.ts index bb4de16ae7..98eeac1698 100644 --- a/docs/settings.data.ts +++ b/docs/settings.data.ts @@ -37,6 +37,8 @@ export default { type = "string"; } else if (type === "ListString" || type === "ListPath") { type = "string[]"; + } else if (type === "IndexMap") { + type = "object"; } // } else if (type === "String" || type === "PathBuf") { // type = 'string'; diff --git a/schema/mise.json b/schema/mise.json index 9960f4ddf8..0cb69d8fef 100644 --- a/schema/mise.json +++ b/schema/mise.json @@ -976,6 +976,13 @@ "description": "List of default shell arguments for unix to be used with inline commands. For example, `sh`, `-c` for sh.", "type": "string" }, + "url_replacements": { + "description": "Map of URL patterns to replacement URLs applied to all requests. Example: {\"github.com\": \"github.example.com\", \"regex:^https://example\\\\.com/(.*)$\": \"https://mirror.com/$1\"}", + "type": "object", + "additionalProperties": { + "type": "string" + } + }, "use_file_shell_for_executable_tasks": { "default": false, "description": "Determines whether to use a specified shell for executing tasks in the tasks directory. When set to true, the shell defined in the file will be used, or the default shell specified by `windows_default_file_shell_args` or `unix_default_file_shell_args` will be applied. If set to false, tasks will be executed directly as programs.", diff --git a/settings.toml b/settings.toml index 0bb66eef2f..3446fb30fe 100644 --- a/settings.toml +++ b/settings.toml @@ -1181,6 +1181,149 @@ type = "String" default = "sh -c -o errexit" description = "List of default shell arguments for unix to be used with inline commands. For example, `sh`, `-c` for sh." +[url_replacements] +env = "MISE_URL_REPLACEMENTS" +type = "IndexMap" +optional = true +parse_env = "parse_url_replacements" +description = "Map of URL patterns to replacement URLs applied to all requests." +docs = ''' +Map of URL patterns to replacement URLs. This feature supports both simple hostname replacements +and advanced regex-based URL transformations for download mirroring and custom registries. + +### Configuration Examples + +Environment variable (JSON format): +```bash +# Simple hostname replacement +export MISE_URL_REPLACEMENTS=' +{ + "github.com": "myregistry.net", + "releases.hashicorp.com": "mirror.example.com" +}' + +# Regex pattern (note the escaped backslashes in JSON) +export MISE_URL_REPLACEMENTS=' +{ + "regex:https://github\.com/([^/]+)/([^/]+)/releases/download/(.+)": + "https://mirror.corp.com/github/$1/$2/$3" +}' +``` + +In mise.toml: +```toml +[settings] +# Simple hostname replacement +url_replacements = { + "github.com" = "myregistry.net", + "releases.hashicorp.com" = "mirror.example.com" +} + +# Regex patterns +url_replacements = { + "regex:^http://(.+)" = "https://$1", + "regex:https://github\\.com/([^/]+)/([^/]+)/releases/download/(.+)" = + "https://mirror.corp.com/github/$1/$2/$3" +} +``` + +### Simple Hostname Replacement + +For simple hostname-based mirroring, the key is the original hostname/domain to replace, +and the value is the replacement string. The replacement happens by searching and replacing +the pattern anywhere in the full URL string (including protocol, hostname, path, and query parameters). + +Examples: +- `github.com` -> `myregistry.net` replaces GitHub hostnames +- `https://github.com` -> `https://myregistry.net` with protocol excludes e.g. 'api.github.com' +- `https://github.com` -> `https://proxy.corp.com/github-mirror` replaces GitHub with corporate proxy +- `http://host.net` -> `https://host.net` replaces protocol from HTTP to HTTPS + +### Advanced Regex Replacement + +For more complex URL transformations, you can use regex patterns. When a key starts with `regex:`, +it is treated as a regular expression pattern that can match and transform any part of the URL. +The value can use capture groups from the regex pattern. + +#### Regex Examples + +**1. Protocol Conversion (HTTP to HTTPS)** +```toml +[settings] +url_replacements = { + "regex:^http://(.+)" = "https://$1" +} +``` +This converts any HTTP URL to HTTPS by capturing everything after "http://" and replacing it with "https://". + +**2. GitHub Release Mirroring with Path Restructuring** +```toml +[settings] +url_replacements = { + "regex:https://github\\.com/([^/]+)/([^/]+)/releases/download/(.+)" = + "https://mirror.corp.com/github/$1/$2/$3" +} +``` +Transforms `https://github.com/owner/repo/releases/download/v1.0.0/file.tar.gz` +to `https://mirror.corp.com/github/owner/repo/v1.0.0/file.tar.gz` + +**3. Subdomain to Path Conversion** +```toml +[settings] +url_replacements = { + "regex:https://([^.]+)\\.cdn\\.example\\.com/(.+)" = + "https://unified-cdn.com/$1/$2" +} +``` +Converts subdomain-based URLs to path-based URLs on a unified CDN. + +**4. Multiple Replacement Patterns (processed in order)** +```toml +[settings] +url_replacements = { + "regex:https://github\\.com/microsoft/(.+)" = + "https://internal-mirror.com/microsoft/$1", + "regex:https://github\\.com/(.+)" = + "https://public-mirror.com/github/$1", + "releases.hashicorp.com" = "hashicorp-mirror.internal.com" +} +``` +First regex catches Microsoft repositories specifically, second catches all other GitHub URLs, +and the simple replacement handles HashiCorp. + +### Use Cases + +1. **Corporate Mirrors**: Replace public download URLs with internal corporate mirrors +2. **Custom Registries**: Redirect package downloads to custom or private registries +3. **Geographic Optimization**: Route downloads to geographically closer mirrors +4. **Protocol Changes**: Convert HTTP URLs to HTTPS or vice versa + +### Regex Syntax + +mise uses Rust regex engine which supports: +- `^` and `$` for anchors (start/end of string) +- `(.+)` for capture groups (use `$1`, `$2`, etc. in replacement) +- `[^/]+` for character classes (matches any character except `/`) +- `\\.` for escaping special characters (note: double backslash required in TOML) +- `*`, `+`, `?` for quantifiers +- `|` for alternation + +You can check on regex101.com if your regex works (see [example](https://regex101.com/r/rmcIE1/1)). +Full regex syntax documentation: + +### Precedence and Matching + +- Regex patterns (keys starting with `regex:`) are processed first, in the order they appear +- Simple hostname replacements are processed second +- The first matching pattern is used; subsequent patterns are ignored for that URL +- If no patterns match, the original URL is used unchanged + +### Security Considerations + +When using regex patterns, ensure your replacement URLs point to trusted sources, +as this feature can redirect tool downloads to arbitrary locations. +''' + [use_file_shell_for_executable_tasks] env = "MISE_USE_FILE_SHELL_FOR_EXECUTABLE_TASKS" type = "Bool" diff --git a/src/cli/config/set.rs b/src/cli/config/set.rs index 9d44e5c486..8e1bf98939 100644 --- a/src/cli/config/set.rs +++ b/src/cli/config/set.rs @@ -95,6 +95,7 @@ impl ConfigSet { SettingsType::ListString => TomlValueTypes::List, SettingsType::ListPath => TomlValueTypes::List, SettingsType::SetString => TomlValueTypes::Set, + SettingsType::IndexMap => TomlValueTypes::String, }, None => match self.value.as_str() { "true" | "false" => TomlValueTypes::Bool, diff --git a/src/cli/settings/ls.rs b/src/cli/settings/ls.rs index d77e94edfc..262ae8352a 100644 --- a/src/cli/settings/ls.rs +++ b/src/cli/settings/ls.rs @@ -55,6 +55,7 @@ fn settings_type_to_string(st: &SettingsType) -> String { SettingsType::ListString => "array".to_string(), SettingsType::ListPath => "array".to_string(), SettingsType::SetString => "array".to_string(), + SettingsType::IndexMap => "object".to_string(), } } diff --git a/src/cli/settings/set.rs b/src/cli/settings/set.rs index e9548d317d..dd59852b4a 100644 --- a/src/cli/settings/set.rs +++ b/src/cli/settings/set.rs @@ -1,7 +1,7 @@ use eyre::{Result, bail, eyre}; use toml_edit::DocumentMut; -use crate::config::settings::{SETTINGS_META, SettingsFile, SettingsType}; +use crate::config::settings::{SETTINGS_META, SettingsFile, SettingsType, parse_url_replacements}; use crate::toml::dedup_toml_array; use crate::{config, duration, file}; @@ -43,6 +43,7 @@ pub fn set(mut key: &str, value: &str, add: bool, local: bool) -> Result<()> { SettingsType::ListString => parse_list_by_comma(value)?, SettingsType::ListPath => parse_list_by_colon(value)?, SettingsType::SetString => parse_set_by_comma(value)?, + SettingsType::IndexMap => parse_indexmap_by_json(value)?, }; let path = if local { @@ -133,6 +134,18 @@ fn parse_duration(value: &str) -> Result { Ok(value.into()) } +fn parse_indexmap_by_json(value: &str) -> Result { + let index_map = parse_url_replacements(value) + .map_err(|e| eyre!("Failed to parse JSON for IndexMap: {}", e))?; + Ok(toml_edit::Value::InlineTable({ + let mut table = toml_edit::InlineTable::new(); + for (k, v) in index_map { + table.insert(&k, toml_edit::Value::String(toml_edit::Formatted::new(v))); + } + table + })) +} + static AFTER_LONG_HELP: &str = color_print::cstr!( r#"Examples: diff --git a/src/config/settings.rs b/src/config/settings.rs index fedf3da65c..18cba72501 100644 --- a/src/config/settings.rs +++ b/src/config/settings.rs @@ -39,6 +39,7 @@ pub enum SettingsType { ListString, ListPath, SetString, + IndexMap, } pub struct SettingsMeta { @@ -532,3 +533,9 @@ where .collect::, _>>() .map(|set| set.into_iter().collect()) } + +/// Parse URL replacements from JSON string format +/// Expected format: {"source_domain": "replacement_domain", ...} +pub fn parse_url_replacements(input: &str) -> Result, serde_json::Error> { + serde_json::from_str(input) +} diff --git a/src/http.rs b/src/http.rs index 8bc1262d05..b1ba826660 100644 --- a/src/http.rs +++ b/src/http.rs @@ -3,6 +3,7 @@ use std::path::Path; use std::time::Duration; use eyre::{Report, Result, bail, ensure}; +use regex; use reqwest::header::{HeaderMap, HeaderValue}; use reqwest::{ClientBuilder, IntoUrl, Method, Response}; use std::sync::LazyLock as Lazy; @@ -224,7 +225,7 @@ impl Client { async fn send_with_https_fallback( &self, method: Method, - mut url: Url, + url: Url, headers: &HeaderMap, verb_label: &str, ) -> Result { @@ -234,6 +235,7 @@ impl Client { { Ok(resp) => Ok(resp), Err(_) if url.scheme() == "http" => { + let mut url = url; url.set_scheme("https").unwrap(); self.send_once(method, url, headers, verb_label).await } @@ -244,10 +246,11 @@ impl Client { async fn send_once( &self, method: Method, - url: Url, + mut url: Url, headers: &HeaderMap, verb_label: &str, ) -> Result { + apply_url_replacements(&mut url); debug!("{} {}", verb_label, &url); let mut req = self.reqwest.request(method, url.clone()); req = req.headers(headers.clone()); @@ -322,6 +325,60 @@ fn github_headers(url: &Url) -> HeaderMap { headers } +/// Apply URL replacements based on settings configuration +/// Supports both simple string replacement and regex patterns (prefixed with "regex:") +pub fn apply_url_replacements(url: &mut Url) { + let settings = Settings::get(); + if let Some(replacements) = &settings.url_replacements { + let url_string = url.to_string(); + + for (pattern, replacement) in replacements { + if let Some(pattern_without_prefix) = pattern.strip_prefix("regex:") { + // Regex replacement + if let Ok(regex) = regex::Regex::new(pattern_without_prefix) { + let new_url_string = regex.replace(&url_string, replacement.as_str()); + // Only proceed if the URL actually changed + if new_url_string != url_string { + if let Ok(new_url) = new_url_string.parse() { + *url = new_url; + debug!( + "Replaced URL using regex '{}': {} -> {}", + pattern_without_prefix, + url_string, + url.as_str() + ); + return; // Apply only the first matching replacement + } + } + } else { + warn!( + "Invalid regex pattern in URL replacement: {}", + pattern_without_prefix + ); + } + } else { + // Simple string replacement + if url_string.contains(pattern) { + let new_url_string = url_string.replace(pattern, replacement); + // Only proceed if the URL actually changed + if new_url_string != url_string { + if let Ok(new_url) = new_url_string.parse() { + *url = new_url; + debug!( + "Replaced URL using string replacement '{}': {} -> {}", + pattern, + url_string, + url.as_str() + ); + return; // Apply only the first matching replacement + } + } + } + } + } + } +} + fn display_github_rate_limit(resp: &Response) { let status = resp.status().as_u16(); if status == 403 || status == 429 { @@ -358,3 +415,286 @@ fn display_github_rate_limit(resp: &Response) { } } } + +#[cfg(test)] +mod tests { + use super::*; + use confique::Partial; + use indexmap::IndexMap; + use url::Url; + + // Mutex to ensure tests don't interfere with each other when modifying global settings + static TEST_SETTINGS_LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(()); + + // Helper to create test settings with specific URL replacements + fn with_test_settings(replacements: IndexMap, test_fn: F) -> R + where + F: FnOnce() -> R, + { + // Lock to prevent parallel tests from interfering with global settings + let _guard = TEST_SETTINGS_LOCK.lock().unwrap(); + + // Create settings with custom URL replacements + let mut settings = crate::config::settings::SettingsPartial::empty(); + settings.url_replacements = Some(replacements); + + // Set settings for this test + crate::config::Settings::reset(Some(settings)); + + // Run test + let result = test_fn(); + + // Clean up after test + crate::config::Settings::reset(None); + + result + } + + #[test] + fn test_simple_string_replacement() { + let mut replacements = IndexMap::new(); + replacements.insert("github.com".to_string(), "my-proxy.com".to_string()); + + with_test_settings(replacements, || { + let mut url = Url::parse("https://github.com/owner/repo").unwrap(); + apply_url_replacements(&mut url); + assert_eq!(url.as_str(), "https://my-proxy.com/owner/repo"); + }); + } + + #[test] + fn test_full_url_string_replacement() { + let mut replacements = IndexMap::new(); + replacements.insert( + "https://github.com".to_string(), + "https://my-proxy.com/artifactory/github-remote".to_string(), + ); + + with_test_settings(replacements, || { + let mut url = Url::parse("https://github.com/owner/repo").unwrap(); + apply_url_replacements(&mut url); + assert_eq!( + url.as_str(), + "https://my-proxy.com/artifactory/github-remote/owner/repo" + ); + }); + } + + #[test] + fn test_protocol_specific_replacement() { + let mut replacements = IndexMap::new(); + replacements.insert( + "https://github.com".to_string(), + "https://secure-proxy.com".to_string(), + ); + + with_test_settings(replacements.clone(), || { + // HTTPS gets replaced + let mut url1 = Url::parse("https://github.com/owner/repo").unwrap(); + apply_url_replacements(&mut url1); + assert_eq!(url1.as_str(), "https://secure-proxy.com/owner/repo"); + }); + + with_test_settings(replacements, || { + // HTTP does not get replaced (no match) + let mut url2 = Url::parse("http://github.com/owner/repo").unwrap(); + apply_url_replacements(&mut url2); + assert_eq!(url2.as_str(), "http://github.com/owner/repo"); + }); + } + + #[test] + fn test_regex_replacement() { + let mut replacements = IndexMap::new(); + replacements.insert( + r"regex:https://github\.com".to_string(), + "https://my-proxy.com".to_string(), + ); + + with_test_settings(replacements, || { + let mut url = Url::parse("https://github.com/owner/repo").unwrap(); + apply_url_replacements(&mut url); + assert_eq!(url.as_str(), "https://my-proxy.com/owner/repo"); + }); + } + + #[test] + fn test_regex_with_capture_groups() { + let mut replacements = IndexMap::new(); + replacements.insert( + r"regex:https://github\.com/([^/]+)/([^/]+)".to_string(), + "https://my-proxy.com/mirror/$1/$2".to_string(), + ); + + with_test_settings(replacements, || { + let mut url = Url::parse("https://github.com/owner/repo/releases").unwrap(); + apply_url_replacements(&mut url); + assert_eq!( + url.as_str(), + "https://my-proxy.com/mirror/owner/repo/releases" + ); + }); + } + + #[test] + fn test_regex_invalid_replacement_url() { + let mut replacements = IndexMap::new(); + replacements.insert( + r"regex:https://github\.com/([^/]+)".to_string(), + "not-a-valid-url".to_string(), + ); + + with_test_settings(replacements, || { + // Invalid result URL should be ignored, original URL unchanged + let mut url = Url::parse("https://github.com/owner/repo").unwrap(); + let original = url.clone(); + apply_url_replacements(&mut url); + assert_eq!(url.as_str(), original.as_str()); + }); + } + + #[test] + fn test_multiple_replacements_first_match_wins() { + let mut replacements = IndexMap::new(); + replacements.insert("github.com".to_string(), "first-proxy.com".to_string()); + replacements.insert("github".to_string(), "second-proxy.com".to_string()); + + with_test_settings(replacements, || { + let mut url = Url::parse("https://github.com/owner/repo").unwrap(); + apply_url_replacements(&mut url); + // First replacement should win + assert_eq!(url.as_str(), "https://first-proxy.com/owner/repo"); + }); + } + + #[test] + fn test_no_replacements_configured() { + let replacements = IndexMap::new(); // Empty + + with_test_settings(replacements, || { + let mut url = Url::parse("https://github.com/owner/repo").unwrap(); + let original = url.clone(); + apply_url_replacements(&mut url); + assert_eq!(url.as_str(), original.as_str()); + }); + } + + #[test] + fn test_regex_complex_patterns() { + let mut replacements = IndexMap::new(); + // Convert GitHub releases to JFrog Artifactory + replacements.insert( + r"regex:https://github\.com/([^/]+)/([^/]+)/releases/download/([^/]+)/(.+)".to_string(), + "https://artifactory.company.com/artifactory/github-releases/$1/$2/$3/$4".to_string(), + ); + + with_test_settings(replacements, || { + let mut url = + Url::parse("https://github.com/owner/repo/releases/download/v1.0.0/file.tar.gz") + .unwrap(); + apply_url_replacements(&mut url); + assert_eq!( + url.as_str(), + "https://artifactory.company.com/artifactory/github-releases/owner/repo/v1.0.0/file.tar.gz" + ); + }); + } + + #[test] + fn test_no_settings_configured() { + // Test the real apply_url_replacements function with no settings override + let _guard = TEST_SETTINGS_LOCK.lock().unwrap(); + crate::config::Settings::reset(None); + + let mut url = Url::parse("https://github.com/owner/repo").unwrap(); + let original = url.clone(); + + // This should not crash and should leave URL unchanged + apply_url_replacements(&mut url); + assert_eq!(url.as_str(), original.as_str()); + } + + #[test] + fn test_replacement_affects_full_url_not_just_hostname() { + // Test that replacement works on the full URL string, not just hostname + let mut replacements = IndexMap::new(); + replacements.insert( + "github.com/owner".to_string(), + "proxy.com/mirror".to_string(), + ); + + with_test_settings(replacements, || { + let mut url = Url::parse("https://github.com/owner/repo").unwrap(); + apply_url_replacements(&mut url); + // This demonstrates that replacement happens on full URL, not just hostname + assert_eq!(url.as_str(), "https://proxy.com/mirror/repo"); + }); + } + + #[test] + fn test_path_replacement_example() { + // Test replacing part of the path, proving it's not hostname-only + let mut replacements = IndexMap::new(); + replacements.insert("/releases/download/".to_string(), "/artifacts/".to_string()); + + with_test_settings(replacements, || { + let mut url = + Url::parse("https://github.com/owner/repo/releases/download/v1.0.0/file.tar.gz") + .unwrap(); + apply_url_replacements(&mut url); + // Path component was replaced, proving it's full URL replacement + assert_eq!( + url.as_str(), + "https://github.com/owner/repo/artifacts/v1.0.0/file.tar.gz" + ); + }); + } + + #[test] + fn test_documentation_examples() { + // Test the examples from the documentation to ensure they work correctly + + // Example 1: Simple hostname replacement + let mut replacements = IndexMap::new(); + replacements.insert("github.com".to_string(), "myregistry.net".to_string()); + + with_test_settings(replacements, || { + let mut url = Url::parse("https://github.com/user/repo").unwrap(); + apply_url_replacements(&mut url); + assert_eq!(url.as_str(), "https://myregistry.net/user/repo"); + }); + + // Example 2: Protocol + hostname replacement + let mut replacements2 = IndexMap::new(); + replacements2.insert( + "https://github.com".to_string(), + "https://proxy.corp.com/github-mirror".to_string(), + ); + + with_test_settings(replacements2, || { + let mut url = Url::parse("https://github.com/user/repo").unwrap(); + apply_url_replacements(&mut url); + assert_eq!( + url.as_str(), + "https://proxy.corp.com/github-mirror/user/repo" + ); + }); + + // Example 3: Domain + path replacement + let mut replacements3 = IndexMap::new(); + replacements3.insert( + "github.com/releases/download/".to_string(), + "cdn.example.com/artifacts/".to_string(), + ); + + with_test_settings(replacements3, || { + let mut url = + Url::parse("https://github.com/releases/download/v1.0.0/file.tar.gz").unwrap(); + apply_url_replacements(&mut url); + assert_eq!( + url.as_str(), + "https://cdn.example.com/artifacts/v1.0.0/file.tar.gz" + ); + }); + } +} diff --git a/xtasks/render/settings.ts b/xtasks/render/settings.ts index b2e68f97c8..fe980836f0 100644 --- a/xtasks/render/settings.ts +++ b/xtasks/render/settings.ts @@ -12,6 +12,9 @@ type Element = { items?: { type: string; }; + additionalProperties?: { + type: string; + }; }; type Props = { @@ -44,6 +47,7 @@ function buildElement(key: string, props: Props): Element { .with("ListString", () => "string[]") .with("ListPath", () => "string[]") .with("SetString", () => "string[]") + .with("IndexMap", () => "object") .otherwise(() => { throw new Error(`Unknown type: ${type}`); }); @@ -73,6 +77,13 @@ function buildElement(key: string, props: Props): Element { }; } + if (type === "object") { + ele.type = "object"; + ele.additionalProperties = { + type: "string", + }; + } + return ele; } From 4c3a5496159177df196f5d97a274f3d7ccf4881f Mon Sep 17 00:00:00 2001 From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com> Date: Sat, 6 Sep 2025 13:31:47 +0000 Subject: [PATCH 2/9] [autofix.ci] apply automated fixes --- schema/mise.json | 2 +- src/http.rs | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/schema/mise.json b/schema/mise.json index 0cb69d8fef..f568e419a0 100644 --- a/schema/mise.json +++ b/schema/mise.json @@ -977,7 +977,7 @@ "type": "string" }, "url_replacements": { - "description": "Map of URL patterns to replacement URLs applied to all requests. Example: {\"github.com\": \"github.example.com\", \"regex:^https://example\\\\.com/(.*)$\": \"https://mirror.com/$1\"}", + "description": "Map of URL patterns to replacement URLs applied to all requests.", "type": "object", "additionalProperties": { "type": "string" diff --git a/src/http.rs b/src/http.rs index b1ba826660..d59026c957 100644 --- a/src/http.rs +++ b/src/http.rs @@ -3,7 +3,6 @@ use std::path::Path; use std::time::Duration; use eyre::{Report, Result, bail, ensure}; -use regex; use reqwest::header::{HeaderMap, HeaderValue}; use reqwest::{ClientBuilder, IntoUrl, Method, Response}; use std::sync::LazyLock as Lazy; From 9af9a0393906224c079cc599e98b6af0e57e8de6 Mon Sep 17 00:00:00 2001 From: ThomasTSteinbach Date: Sat, 6 Sep 2025 15:46:15 +0200 Subject: [PATCH 3/9] chore(http): optimized imports --- src/http.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/http.rs b/src/http.rs index d59026c957..9d8ed288f2 100644 --- a/src/http.rs +++ b/src/http.rs @@ -3,6 +3,7 @@ use std::path::Path; use std::time::Duration; use eyre::{Report, Result, bail, ensure}; +use regex::Regex; use reqwest::header::{HeaderMap, HeaderValue}; use reqwest::{ClientBuilder, IntoUrl, Method, Response}; use std::sync::LazyLock as Lazy; @@ -334,7 +335,7 @@ pub fn apply_url_replacements(url: &mut Url) { for (pattern, replacement) in replacements { if let Some(pattern_without_prefix) = pattern.strip_prefix("regex:") { // Regex replacement - if let Ok(regex) = regex::Regex::new(pattern_without_prefix) { + if let Ok(regex) = Regex::new(pattern_without_prefix) { let new_url_string = regex.replace(&url_string, replacement.as_str()); // Only proceed if the URL actually changed if new_url_string != url_string { From 5665a29feced979d4acd7dbcf905613f972b4987 Mon Sep 17 00:00:00 2001 From: ThomasTSteinbach Date: Sat, 6 Sep 2025 15:59:10 +0200 Subject: [PATCH 4/9] chore: reverted changes to .gitignore --- .gitignore | 3 --- 1 file changed, 3 deletions(-) diff --git a/.gitignore b/.gitignore index 3905e6819c..1a49d72606 100644 --- a/.gitignore +++ b/.gitignore @@ -34,6 +34,3 @@ testResults.xml comment.md .claude/settings.local.json -.serena -AGENTS.md -opencode.json From 11d079c25af9dc1550e29baa55991e9ecf470bf2 Mon Sep 17 00:00:00 2001 From: ThomasTSteinbach Date: Sat, 6 Sep 2025 16:20:08 +0200 Subject: [PATCH 5/9] feat(http): raised log level for url replacments to 'trace' --- src/http.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/http.rs b/src/http.rs index 9d8ed288f2..31ca8b67e0 100644 --- a/src/http.rs +++ b/src/http.rs @@ -341,7 +341,7 @@ pub fn apply_url_replacements(url: &mut Url) { if new_url_string != url_string { if let Ok(new_url) = new_url_string.parse() { *url = new_url; - debug!( + trace!( "Replaced URL using regex '{}': {} -> {}", pattern_without_prefix, url_string, @@ -364,7 +364,7 @@ pub fn apply_url_replacements(url: &mut Url) { if new_url_string != url_string { if let Ok(new_url) = new_url_string.parse() { *url = new_url; - debug!( + trace!( "Replaced URL using string replacement '{}': {} -> {}", pattern, url_string, From c1bb1d085494b94177bdb9d2e7a0d6b6e9d12320 Mon Sep 17 00:00:00 2001 From: ThomasTSteinbach Date: Sat, 6 Sep 2025 16:27:16 +0200 Subject: [PATCH 6/9] docs: fixed url replacement precedence --- settings.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/settings.toml b/settings.toml index 3446fb30fe..460f649002 100644 --- a/settings.toml +++ b/settings.toml @@ -1313,8 +1313,8 @@ Full regex syntax documentation: ### Precedence and Matching -- Regex patterns (keys starting with `regex:`) are processed first, in the order they appear -- Simple hostname replacements are processed second +- URL replacements are processed in the order they appear in the configuration (IndexMap insertion order) +- Both regex patterns (keys starting with `regex:`) and simple string replacements are processed in this same order - The first matching pattern is used; subsequent patterns are ignored for that URL - If no patterns match, the original URL is used unchanged From af2c1c7a6dbf01702110131acb2e74a87a1cb313 Mon Sep 17 00:00:00 2001 From: ThomasTSteinbach Date: Sat, 6 Sep 2025 16:54:36 +0200 Subject: [PATCH 7/9] docs: use real world product names --- settings.toml | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/settings.toml b/settings.toml index 460f649002..e0035a0edb 100644 --- a/settings.toml +++ b/settings.toml @@ -1198,15 +1198,16 @@ Environment variable (JSON format): # Simple hostname replacement export MISE_URL_REPLACEMENTS=' { - "github.com": "myregistry.net", - "releases.hashicorp.com": "mirror.example.com" + "github.com": "nexus.mycompany.net", + "releases.hashicorp.com": "artifactory.xmpl.com" }' # Regex pattern (note the escaped backslashes in JSON) export MISE_URL_REPLACEMENTS=' { + "regex:^http://(.+)" = "https://$1", "regex:https://github\.com/([^/]+)/([^/]+)/releases/download/(.+)": - "https://mirror.corp.com/github/$1/$2/$3" + "https://hub.corp.com/artifactory/github/$1/$2/$3" }' ``` @@ -1215,15 +1216,15 @@ In mise.toml: [settings] # Simple hostname replacement url_replacements = { - "github.com" = "myregistry.net", - "releases.hashicorp.com" = "mirror.example.com" + "github.com" = "nexus.mycompany.net", + "releases.hashicorp.com" = "artifactory.xmpl.com" } # Regex patterns -url_replacements = { +url_replacements = { "regex:^http://(.+)" = "https://$1", "regex:https://github\\.com/([^/]+)/([^/]+)/releases/download/(.+)" = - "https://mirror.corp.com/github/$1/$2/$3" + "https://hub.corp.com/artifactory/github/$1/$2/$3" } ``` @@ -1234,8 +1235,8 @@ and the value is the replacement string. The replacement happens by searching an the pattern anywhere in the full URL string (including protocol, hostname, path, and query parameters). Examples: -- `github.com` -> `myregistry.net` replaces GitHub hostnames -- `https://github.com` -> `https://myregistry.net` with protocol excludes e.g. 'api.github.com' +- `github.com` -> `nexus.mycompany.net` replaces GitHub hostnames +- `https://github.com` -> `https://nexus.mycompany.net` with protocol excludes e.g. 'api.github.com' - `https://github.com` -> `https://proxy.corp.com/github-mirror` replaces GitHub with corporate proxy - `http://host.net` -> `https://host.net` replaces protocol from HTTP to HTTPS @@ -1261,11 +1262,11 @@ This converts any HTTP URL to HTTPS by capturing everything after "http://" and [settings] url_replacements = { "regex:https://github\\.com/([^/]+)/([^/]+)/releases/download/(.+)" = - "https://mirror.corp.com/github/$1/$2/$3" + "https://hub.corp.com/artifactory/github/$1/$2/$3" } ``` Transforms `https://github.com/owner/repo/releases/download/v1.0.0/file.tar.gz` -to `https://mirror.corp.com/github/owner/repo/v1.0.0/file.tar.gz` +to `https://hub.corp.com/artifactory/github/owner/repo/v1.0.0/file.tar.gz` **3. Subdomain to Path Conversion** ```toml From 4063436781759507fadf6e27ce6e4760dc2ce5cd Mon Sep 17 00:00:00 2001 From: ThomasTSteinbach Date: Sat, 6 Sep 2025 17:17:24 +0200 Subject: [PATCH 8/9] docs: move bulk of url replacement doc onto a separate page --- docs/.vitepress/config.ts | 1 + docs/url-replacements.md | 142 ++++++++++++++++++++++++++++++++++++++ settings.toml | 133 +---------------------------------- 3 files changed, 144 insertions(+), 132 deletions(-) create mode 100644 docs/url-replacements.md diff --git a/docs/.vitepress/config.ts b/docs/.vitepress/config.ts index 95d7530425..a9485fadad 100644 --- a/docs/.vitepress/config.ts +++ b/docs/.vitepress/config.ts @@ -181,6 +181,7 @@ export default withMermaid( { text: "Architecture", link: "/architecture" }, { text: "Paranoid", link: "/paranoid" }, { text: "Templates", link: "/templates" }, + { text: "URL Replacements", link: "/url-replacements" }, { text: "Model Context Protocol", link: "/mcp" }, { text: "How I Use mise", link: "/how-i-use-mise" }, { text: "Directory Structure", link: "/directories" }, diff --git a/docs/url-replacements.md b/docs/url-replacements.md new file mode 100644 index 0000000000..05eb2e52d7 --- /dev/null +++ b/docs/url-replacements.md @@ -0,0 +1,142 @@ +# URL Replacements + +mise does not include a built-in registry for downloading artifacts. +Instead, it retrieves remote registry manifests, which specify the URLs for downloading tools. + +In some environments — such as enterprises or DMZs — these URLs may not be directly accessible and must be accessed through a proxy or internal mirror. + +URL replacements allow you to modify or redirect any URL that mise attempts to access, making it possible to use internal proxies, mirrors, or alternative sources as needed. + + +## Configuration Examples + +Environment variable (JSON format): +```bash +# Simple hostname replacement +export MISE_URL_REPLACEMENTS=' +{ + "github.com": "nexus.mycompany.net", + "releases.hashicorp.com": "artifactory.xmpl.com" +}' + +# Regex pattern (note the escaped backslashes in JSON) +export MISE_URL_REPLACEMENTS=' +{ + "regex:^http://(.+)" = "https://$1", + "regex:https://github\.com/([^/]+)/([^/]+)/releases/download/(.+)": + "https://hub.corp.com/artifactory/github/$1/$2/$3" +}' +``` + +In mise.toml: +```toml +[settings] +# Simple hostname replacement +url_replacements = { + "github.com" = "nexus.mycompany.net", + "releases.hashicorp.com" = "artifactory.xmpl.com" +} + +# Regex patterns +url_replacements = { + "regex:^http://(.+)" = "https://$1", + "regex:https://github\\.com/([^/]+)/([^/]+)/releases/download/(.+)" = + "https://hub.corp.com/artifactory/github/$1/$2/$3" +} +``` + +## Simple Hostname Replacement + +For simple hostname-based mirroring, the key is the original hostname/domain to replace, +and the value is the replacement string. The replacement happens by searching and replacing +the pattern anywhere in the full URL string (including protocol, hostname, path, and query parameters). + +Examples: +- `github.com` -> `nexus.mycompany.net` replaces GitHub hostnames +- `https://github.com` -> `https://nexus.mycompany.net` with protocol excludes e.g. 'api.github.com' +- `https://github.com` -> `https://proxy.corp.com/github-mirror` replaces GitHub with corporate proxy +- `http://host.net` -> `https://host.net` replaces protocol from HTTP to HTTPS + +## Advanced Regex Replacement + +For more complex URL transformations, you can use regex patterns. When a key starts with `regex:`, +it is treated as a regular expression pattern that can match and transform any part of the URL. +The value can use capture groups from the regex pattern. + +### Regex Examples + +**1. Protocol Conversion (HTTP to HTTPS)** +```toml +[settings] +url_replacements = { + "regex:^http://(.+)" = "https://$1" +} +``` +This converts any HTTP URL to HTTPS by capturing everything after "http://" and replacing it with "https://". + +**2. GitHub Release Mirroring with Path Restructuring** +```toml +[settings] +url_replacements = { + "regex:https://github\\.com/([^/]+)/([^/]+)/releases/download/(.+)" = + "https://hub.corp.com/artifactory/github/$1/$2/$3" +} +``` +Transforms `https://github.com/owner/repo/releases/download/v1.0.0/file.tar.gz` +to `https://hub.corp.com/artifactory/github/owner/repo/v1.0.0/file.tar.gz` + +**3. Subdomain to Path Conversion** +```toml +[settings] +url_replacements = { + "regex:https://([^.]+)\\.cdn\\.example\\.com/(.+)" = + "https://unified-cdn.com/$1/$2" +} +``` +Converts subdomain-based URLs to path-based URLs on a unified CDN. + +**4. Multiple Replacement Patterns (processed in order)** +```toml +[settings] +url_replacements = { + "regex:https://github\\.com/microsoft/(.+)" = + "https://internal-mirror.com/microsoft/$1", + "regex:https://github\\.com/(.+)" = + "https://public-mirror.com/github/$1", + "releases.hashicorp.com" = "hashicorp-mirror.internal.com" +} +``` +First regex catches Microsoft repositories specifically, second catches all other GitHub URLs, +and the simple replacement handles HashiCorp. + +## Use Cases + +1. **Corporate Mirrors**: Replace public download URLs with internal corporate mirrors +2. **Custom Registries**: Redirect package downloads to custom or private registries +3. **Geographic Optimization**: Route downloads to geographically closer mirrors +4. **Protocol Changes**: Convert HTTP URLs to HTTPS or vice versa + +## Regex Syntax + +mise uses Rust regex engine which supports: +- `^` and `$` for anchors (start/end of string) +- `(.+)` for capture groups (use `$1`, `$2`, etc. in replacement) +- `[^/]+` for character classes (matches any character except `/`) +- `\\.` for escaping special characters (note: double backslash required in TOML) +- `*`, `+`, `?` for quantifiers +- `|` for alternation + +You can check on regex101.com if your regex works (see [example](https://regex101.com/r/rmcIE1/1)). +Full regex syntax documentation: + +## Precedence and Matching + +- URL replacements are processed in the order they appear in the configuration (IndexMap insertion order) +- Both regex patterns (keys starting with `regex:`) and simple string replacements are processed in this same order +- The first matching pattern is used; subsequent patterns are ignored for that URL +- If no patterns match, the original URL is used unchanged + +## Security Considerations + +When using regex patterns, ensure your replacement URLs point to trusted sources, +as this feature can redirect tool downloads to arbitrary locations. \ No newline at end of file diff --git a/settings.toml b/settings.toml index e0035a0edb..38cb23fdf1 100644 --- a/settings.toml +++ b/settings.toml @@ -1191,138 +1191,7 @@ docs = ''' Map of URL patterns to replacement URLs. This feature supports both simple hostname replacements and advanced regex-based URL transformations for download mirroring and custom registries. -### Configuration Examples - -Environment variable (JSON format): -```bash -# Simple hostname replacement -export MISE_URL_REPLACEMENTS=' -{ - "github.com": "nexus.mycompany.net", - "releases.hashicorp.com": "artifactory.xmpl.com" -}' - -# Regex pattern (note the escaped backslashes in JSON) -export MISE_URL_REPLACEMENTS=' -{ - "regex:^http://(.+)" = "https://$1", - "regex:https://github\.com/([^/]+)/([^/]+)/releases/download/(.+)": - "https://hub.corp.com/artifactory/github/$1/$2/$3" -}' -``` - -In mise.toml: -```toml -[settings] -# Simple hostname replacement -url_replacements = { - "github.com" = "nexus.mycompany.net", - "releases.hashicorp.com" = "artifactory.xmpl.com" -} - -# Regex patterns -url_replacements = { - "regex:^http://(.+)" = "https://$1", - "regex:https://github\\.com/([^/]+)/([^/]+)/releases/download/(.+)" = - "https://hub.corp.com/artifactory/github/$1/$2/$3" -} -``` - -### Simple Hostname Replacement - -For simple hostname-based mirroring, the key is the original hostname/domain to replace, -and the value is the replacement string. The replacement happens by searching and replacing -the pattern anywhere in the full URL string (including protocol, hostname, path, and query parameters). - -Examples: -- `github.com` -> `nexus.mycompany.net` replaces GitHub hostnames -- `https://github.com` -> `https://nexus.mycompany.net` with protocol excludes e.g. 'api.github.com' -- `https://github.com` -> `https://proxy.corp.com/github-mirror` replaces GitHub with corporate proxy -- `http://host.net` -> `https://host.net` replaces protocol from HTTP to HTTPS - -### Advanced Regex Replacement - -For more complex URL transformations, you can use regex patterns. When a key starts with `regex:`, -it is treated as a regular expression pattern that can match and transform any part of the URL. -The value can use capture groups from the regex pattern. - -#### Regex Examples - -**1. Protocol Conversion (HTTP to HTTPS)** -```toml -[settings] -url_replacements = { - "regex:^http://(.+)" = "https://$1" -} -``` -This converts any HTTP URL to HTTPS by capturing everything after "http://" and replacing it with "https://". - -**2. GitHub Release Mirroring with Path Restructuring** -```toml -[settings] -url_replacements = { - "regex:https://github\\.com/([^/]+)/([^/]+)/releases/download/(.+)" = - "https://hub.corp.com/artifactory/github/$1/$2/$3" -} -``` -Transforms `https://github.com/owner/repo/releases/download/v1.0.0/file.tar.gz` -to `https://hub.corp.com/artifactory/github/owner/repo/v1.0.0/file.tar.gz` - -**3. Subdomain to Path Conversion** -```toml -[settings] -url_replacements = { - "regex:https://([^.]+)\\.cdn\\.example\\.com/(.+)" = - "https://unified-cdn.com/$1/$2" -} -``` -Converts subdomain-based URLs to path-based URLs on a unified CDN. - -**4. Multiple Replacement Patterns (processed in order)** -```toml -[settings] -url_replacements = { - "regex:https://github\\.com/microsoft/(.+)" = - "https://internal-mirror.com/microsoft/$1", - "regex:https://github\\.com/(.+)" = - "https://public-mirror.com/github/$1", - "releases.hashicorp.com" = "hashicorp-mirror.internal.com" -} -``` -First regex catches Microsoft repositories specifically, second catches all other GitHub URLs, -and the simple replacement handles HashiCorp. - -### Use Cases - -1. **Corporate Mirrors**: Replace public download URLs with internal corporate mirrors -2. **Custom Registries**: Redirect package downloads to custom or private registries -3. **Geographic Optimization**: Route downloads to geographically closer mirrors -4. **Protocol Changes**: Convert HTTP URLs to HTTPS or vice versa - -### Regex Syntax - -mise uses Rust regex engine which supports: -- `^` and `$` for anchors (start/end of string) -- `(.+)` for capture groups (use `$1`, `$2`, etc. in replacement) -- `[^/]+` for character classes (matches any character except `/`) -- `\\.` for escaping special characters (note: double backslash required in TOML) -- `*`, `+`, `?` for quantifiers -- `|` for alternation - -You can check on regex101.com if your regex works (see [example](https://regex101.com/r/rmcIE1/1)). -Full regex syntax documentation: - -### Precedence and Matching - -- URL replacements are processed in the order they appear in the configuration (IndexMap insertion order) -- Both regex patterns (keys starting with `regex:`) and simple string replacements are processed in this same order -- The first matching pattern is used; subsequent patterns are ignored for that URL -- If no patterns match, the original URL is used unchanged - -### Security Considerations - -When using regex patterns, ensure your replacement URLs point to trusted sources, -as this feature can redirect tool downloads to arbitrary locations. +See [URL Replacements](/url-replacements.html) for more information. ''' [use_file_shell_for_executable_tasks] From 904d4fde133d8c9b7984ef24697f57e0b39c92db Mon Sep 17 00:00:00 2001 From: ThomasTSteinbach Date: Sat, 6 Sep 2025 17:31:36 +0200 Subject: [PATCH 9/9] fix: documentation lint fixes --- docs/url-replacements.md | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/docs/url-replacements.md b/docs/url-replacements.md index 05eb2e52d7..4915b207e5 100644 --- a/docs/url-replacements.md +++ b/docs/url-replacements.md @@ -7,7 +7,6 @@ In some environments — such as enterprises or DMZs — these URLs may not be d URL replacements allow you to modify or redirect any URL that mise attempts to access, making it possible to use internal proxies, mirrors, or alternative sources as needed. - ## Configuration Examples Environment variable (JSON format): @@ -48,7 +47,7 @@ url_replacements = { ## Simple Hostname Replacement For simple hostname-based mirroring, the key is the original hostname/domain to replace, -and the value is the replacement string. The replacement happens by searching and replacing +and the value is the replacement string. The replacement happens by searching and replacing the pattern anywhere in the full URL string (including protocol, hostname, path, and query parameters). Examples: @@ -65,7 +64,7 @@ The value can use capture groups from the regex pattern. ### Regex Examples -**1. Protocol Conversion (HTTP to HTTPS)** +#### 1. Protocol Conversion (HTTP to HTTPS) ```toml [settings] url_replacements = { @@ -74,7 +73,7 @@ url_replacements = { ``` This converts any HTTP URL to HTTPS by capturing everything after "http://" and replacing it with "https://". -**2. GitHub Release Mirroring with Path Restructuring** +#### 2. GitHub Release Mirroring with Path Restructuring ```toml [settings] url_replacements = { @@ -85,7 +84,7 @@ url_replacements = { Transforms `https://github.com/owner/repo/releases/download/v1.0.0/file.tar.gz` to `https://hub.corp.com/artifactory/github/owner/repo/v1.0.0/file.tar.gz` -**3. Subdomain to Path Conversion** +#### 3. Subdomain to Path Conversion ```toml [settings] url_replacements = { @@ -95,7 +94,7 @@ url_replacements = { ``` Converts subdomain-based URLs to path-based URLs on a unified CDN. -**4. Multiple Replacement Patterns (processed in order)** +#### 4. Multiple Replacement Patterns (processed in order) ```toml [settings] url_replacements = { @@ -106,7 +105,7 @@ url_replacements = { "releases.hashicorp.com" = "hashicorp-mirror.internal.com" } ``` -First regex catches Microsoft repositories specifically, second catches all other GitHub URLs, +First regex catches Microsoft repositories specifically, second catches all other GitHub URLs, and the simple replacement handles HashiCorp. ## Use Cases @@ -139,4 +138,4 @@ Full regex syntax documentation: ## Security Considerations When using regex patterns, ensure your replacement URLs point to trusted sources, -as this feature can redirect tool downloads to arbitrary locations. \ No newline at end of file +as this feature can redirect tool downloads to arbitrary locations.