diff --git a/Cargo.lock b/Cargo.lock index e17081c3eb..537a11a624 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -245,6 +245,7 @@ checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" name = "aqua-registry" version = "2026.5.3" dependencies = [ + "blake3", "expr-lang", "eyre", "heck", @@ -254,9 +255,10 @@ dependencies = [ "rkyv", "serde", "serde_yaml", + "siphasher", "strum 0.28.0", + "tempfile", "thiserror 2.0.18", - "tokio", "versions", ] diff --git a/crates/aqua-registry/Cargo.toml b/crates/aqua-registry/Cargo.toml index 67d30923ec..18b28d5f91 100644 --- a/crates/aqua-registry/Cargo.toml +++ b/crates/aqua-registry/Cargo.toml @@ -20,6 +20,7 @@ default = [] [dependencies] # Core dependencies +blake3 = "1" serde = { version = "1", features = ["derive"] } serde_yaml = "0.9" thiserror = "2" @@ -27,19 +28,14 @@ eyre = "0.6" indexmap = { version = "2", features = ["serde"] } itertools = "0.14" rkyv = { version = "0.8", features = ["unaligned"] } +siphasher = "1" strum = { version = "0.28", features = ["derive"] } +tempfile = "3" # Template parsing and evaluation expr-lang = "1" heck = "0.5" versions = { version = "7", features = ["serde"] } -# Async runtime -tokio = { version = "1", features = ["sync"] } - # Logging log = "0.4" - - -[dev-dependencies] -tokio = { version = "1", features = ["rt", "macros"] } diff --git a/crates/aqua-registry/README.md b/crates/aqua-registry/README.md index b7c1a8b630..2ac0689e86 100644 --- a/crates/aqua-registry/README.md +++ b/crates/aqua-registry/README.md @@ -1,14 +1,16 @@ # aqua-registry -Aqua registry backend for [mise](https://mise.en.dev). +Aqua registry primitives for [mise](https://mise.en.dev). -This crate provides support for the [Aqua](https://aquaproj.github.io/) registry format, allowing mise to install tools from the Aqua ecosystem. +This crate provides support for the [Aqua](https://aquaproj.github.io/) registry format. +It owns parsing, package lookup, package serialization codecs, and the on-disk source/compiled cache layout. mise owns remote fetching policy, baked registry fallback, settings, and integration behavior. ## Features - Parse and validate Aqua registry YAML files - Resolve package versions and platform-specific assets - Template string evaluation for dynamic asset URLs +- Source and compiled registry cache mechanics - Support for checksums, signatures, and provenance verification - Platform-aware asset resolution for cross-platform tool installation diff --git a/crates/aqua-registry/src/cache.rs b/crates/aqua-registry/src/cache.rs new file mode 100644 index 0000000000..b531cffd3b --- /dev/null +++ b/crates/aqua-registry/src/cache.rs @@ -0,0 +1,368 @@ +use crate::{AquaRegistryError, CompiledRegistry, ParsedRegistry, Result}; +use blake3::Hasher as Blake3Hasher; +use siphasher::sip::SipHasher; +use std::fs; +use std::hash::{Hash, Hasher}; +use std::io::Write; +use std::path::{Path, PathBuf}; +use std::time::{Duration, SystemTime}; + +const COMPILED_REGISTRY_CACHE_VERSION: &str = "v1"; + +#[derive(Debug, Clone)] +pub struct RegistryCache { + root: PathBuf, +} + +impl RegistryCache { + pub fn new(root: impl Into) -> Self { + Self { root: root.into() } + } + + pub fn source_path(&self, registry_url: &str) -> PathBuf { + self.root + .join("sources") + .join(format!("{}.yaml", registry_url_hash(registry_url))) + } + + pub fn read_source(&self, registry_url: &str) -> Result> { + let path = self.source_path(registry_url); + read_optional_to_string(&path) + } + + pub fn read_fresh_source( + &self, + registry_url: &str, + max_age: Duration, + ) -> Result> { + let path = self.source_path(registry_url); + if !path_is_fresh(&path, max_age)? { + return Ok(None); + } + read_optional_to_string(&path) + } + + pub fn write_source(&self, registry_url: &str, source: &str) -> Result<()> { + let path = self.source_path(registry_url); + let Some(parent) = path.parent() else { + return Err(AquaRegistryError::RegistryNotAvailable(format!( + "cached aqua registry source path has no parent: {}", + path.display() + ))); + }; + fs::create_dir_all(parent)?; + + let mut tmp = tempfile::NamedTempFile::with_prefix_in("registry-source.", parent)?; + tmp.write_all(source.as_bytes())?; + tmp.persist(&path).map_err(|err| err.error)?; + Ok(()) + } + + pub fn source_hash(source: &str) -> String { + source_hash(source) + } + + pub fn compiled_dir(&self, registry_url: &str, source_hash: &str) -> PathBuf { + self.root + .join("compiled") + .join(registry_url_hash(registry_url)) + .join(COMPILED_REGISTRY_CACHE_VERSION) + .join(source_hash) + } + + pub fn load_compiled(&self, registry_url: &str, source_hash: &str) -> Result { + CompiledRegistry::load(self.compiled_dir(registry_url, source_hash)) + } + + pub fn write_compiled( + &self, + registry_url: &str, + source_hash: &str, + registry: &ParsedRegistry, + ) -> Result { + let compiled_dir = self.compiled_dir(registry_url, source_hash); + if let Ok(existing) = CompiledRegistry::load(&compiled_dir) { + self.prune_stale_compiled(registry_url, source_hash); + return Ok(existing); + } + + let Some(parent) = compiled_dir.parent() else { + return Err(AquaRegistryError::RegistryNotAvailable(format!( + "compiled aqua registry cache path has no parent: {}", + compiled_dir.display() + ))); + }; + fs::create_dir_all(parent)?; + + let tmp_dir = tempfile::Builder::new() + .prefix(&format!("{source_hash}.tmp-")) + .tempdir_in(parent)?; + let tmp_path = tmp_dir.path().to_path_buf(); + + registry.write_compiled_cache(&tmp_path)?; + let tmp_path = tmp_dir.keep(); + + if let Ok(existing) = CompiledRegistry::load(&compiled_dir) { + cleanup_tmp_dir_for_existing_compiled_cache(&tmp_path, &compiled_dir)?; + self.prune_stale_compiled(registry_url, source_hash); + return Ok(existing); + } + + if compiled_dir.exists() { + remove_dir_all_if_exists(&compiled_dir)?; + } + + if let Err(err) = fs::rename(&tmp_path, &compiled_dir) { + if let Ok(existing) = CompiledRegistry::load(&compiled_dir) { + cleanup_tmp_dir_for_existing_compiled_cache(&tmp_path, &compiled_dir)?; + self.prune_stale_compiled(registry_url, source_hash); + return Ok(existing); + } + let _ = remove_dir_all_if_exists(&tmp_path); + return Err(err.into()); + } + + let compiled = CompiledRegistry::load(&compiled_dir)?; + self.prune_stale_compiled(registry_url, source_hash); + Ok(compiled) + } + + pub fn prune_stale_compiled(&self, registry_url: &str, source_hash: &str) { + let current_dir = self.compiled_dir(registry_url, source_hash); + prune_stale_compiled_registries(¤t_dir); + } +} + +fn registry_url_hash(registry_url: &str) -> String { + hash_to_str(®istry_url) +} + +fn source_hash(source: &str) -> String { + let mut hasher = Blake3Hasher::new(); + hasher.update(source.as_bytes()); + hasher.finalize().to_hex().to_string() +} + +fn hash_to_str(t: &T) -> String { + let mut s = SipHasher::new(); + t.hash(&mut s); + format!("{:x}", s.finish()) +} + +fn read_optional_to_string(path: &Path) -> Result> { + match fs::read_to_string(path) { + Ok(source) => Ok(Some(source)), + Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None), + Err(err) => Err(err.into()), + } +} + +fn path_is_fresh(path: &Path, max_age: Duration) -> Result { + let Some(age) = path_age(path)? else { + return Ok(false); + }; + Ok(age < max_age) +} + +fn path_age(path: &Path) -> Result> { + let metadata = match fs::metadata(path) { + Ok(metadata) => metadata, + Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None), + Err(err) => return Err(err.into()), + }; + let modified = metadata.modified()?; + Ok(Some( + SystemTime::now() + .duration_since(modified) + .unwrap_or_default(), + )) +} + +fn prune_stale_compiled_registries(current_dir: &Path) { + let Some(parent) = current_dir.parent() else { + return; + }; + let Ok(entries) = fs::read_dir(parent) else { + return; + }; + + for entry in entries.flatten() { + let path = entry.path(); + if path == current_dir { + continue; + } + if entry.file_type().is_ok_and(|file_type| file_type.is_dir()) + && is_compiled_source_hash_dir(&path) + && let Err(err) = fs::remove_dir_all(&path) + { + log::debug!( + "failed to prune stale compiled aqua registry cache {}: {err}", + path.display() + ); + } + } +} + +fn is_compiled_source_hash_dir(path: &Path) -> bool { + path.file_name() + .and_then(|name| name.to_str()) + .is_some_and(|name| name.len() == 64 && name.bytes().all(|b| b.is_ascii_hexdigit())) +} + +fn cleanup_tmp_dir_for_existing_compiled_cache(tmp_dir: &Path, compiled_dir: &Path) -> Result<()> { + match fs::remove_dir_all(tmp_dir) { + Ok(()) => Ok(()), + Err(err) + if err.kind() == std::io::ErrorKind::NotFound + && CompiledRegistry::load(compiled_dir).is_ok() => + { + Ok(()) + } + Err(err) => Err(err.into()), + } +} + +fn remove_dir_all_if_exists(path: &Path) -> std::io::Result<()> { + match fs::remove_dir_all(path) { + Ok(()) => Ok(()), + Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()), + Err(err) => Err(err), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn registry_source(package_id: &str) -> String { + format!("packages:\n - name: {package_id}\n url: https://example.com/tool\n") + } + + #[test] + fn source_cache_reads_fresh_sources_and_skips_stale_sources() { + let temp = tempfile::tempdir().unwrap(); + let cache = RegistryCache::new(temp.path()); + let registry_url = "https://example.com/aqua-registry"; + + cache.write_source(registry_url, "packages: []").unwrap(); + + assert_eq!( + cache + .read_fresh_source(registry_url, Duration::from_secs(60)) + .unwrap() + .as_deref(), + Some("packages: []") + ); + assert!( + cache + .read_fresh_source(registry_url, Duration::ZERO) + .unwrap() + .is_none() + ); + } + + #[test] + fn source_cache_writes_atomically_and_overwrites_existing_source() { + let temp = tempfile::tempdir().unwrap(); + let cache = RegistryCache::new(temp.path()); + let registry_url = "https://example.com/aqua-registry"; + + cache.write_source(registry_url, "first").unwrap(); + cache.write_source(registry_url, "second").unwrap(); + + assert_eq!( + cache.read_source(registry_url).unwrap().as_deref(), + Some("second") + ); + assert!(cache.source_path(registry_url).is_file()); + } + + #[test] + fn compiled_cache_is_scoped_by_registry_url() { + let cache = RegistryCache::new("/cache"); + let source_hash = RegistryCache::source_hash("packages: []"); + let first = cache.compiled_dir("https://example.com/one", &source_hash); + let second = cache.compiled_dir("https://example.com/two", &source_hash); + + assert_ne!(first.parent(), second.parent()); + assert_eq!( + first.file_name().and_then(|name| name.to_str()), + Some(source_hash.as_str()) + ); + } + + #[test] + fn compiled_cache_writes_loads_and_prunes_stale_source_hash_siblings() { + let temp = tempfile::tempdir().unwrap(); + let cache = RegistryCache::new(temp.path()); + let registry_url = "https://example.com/aqua-registry"; + let first_source = registry_source("example/first"); + let second_source = registry_source("example/second"); + let first_hash = RegistryCache::source_hash(&first_source); + let second_hash = RegistryCache::source_hash(&second_source); + let first_registry = ParsedRegistry::parse_yaml(&first_source).unwrap(); + let second_registry = ParsedRegistry::parse_yaml(&second_source).unwrap(); + + cache + .write_compiled(registry_url, &first_hash, &first_registry) + .unwrap(); + let first_dir = cache.compiled_dir(registry_url, &first_hash); + assert!(first_dir.is_dir()); + + cache + .write_compiled(registry_url, &second_hash, &second_registry) + .unwrap(); + let second_dir = cache.compiled_dir(registry_url, &second_hash); + let loaded = cache.load_compiled(registry_url, &second_hash).unwrap(); + + assert!(second_dir.is_dir()); + assert!(!first_dir.exists()); + assert!(loaded.package("example/second").is_ok()); + } + + #[test] + fn compiled_cache_prune_skips_temp_directories() { + let temp = tempfile::tempdir().unwrap(); + let cache = RegistryCache::new(temp.path()); + let registry_url = "https://example.com/aqua-registry"; + let current_hash = RegistryCache::source_hash(®istry_source("example/current")); + let stale_hash = RegistryCache::source_hash(®istry_source("example/stale")); + let current_dir = cache.compiled_dir(registry_url, ¤t_hash); + let stale_dir = cache.compiled_dir(registry_url, &stale_hash); + let tmp_dir = current_dir + .parent() + .unwrap() + .join(format!("{current_hash}.tmp-in-progress")); + + fs::create_dir_all(¤t_dir).unwrap(); + fs::create_dir_all(&stale_dir).unwrap(); + fs::create_dir_all(&tmp_dir).unwrap(); + + cache.prune_stale_compiled(registry_url, ¤t_hash); + + assert!(current_dir.is_dir()); + assert!(!stale_dir.exists()); + assert!(tmp_dir.is_dir()); + } + + #[test] + fn compiled_temp_cleanup_treats_missing_temp_as_success_when_final_cache_exists() { + let temp = tempfile::tempdir().unwrap(); + let cache = RegistryCache::new(temp.path()); + let registry_url = "https://example.com/aqua-registry"; + let source = registry_source("example/tool"); + let source_hash = RegistryCache::source_hash(&source); + let registry = ParsedRegistry::parse_yaml(&source).unwrap(); + let compiled_dir = cache.compiled_dir(registry_url, &source_hash); + let missing_tmp_dir = compiled_dir.with_file_name(format!("{source_hash}.tmp-missing")); + + registry.write_compiled_cache(&compiled_dir).unwrap(); + + cleanup_tmp_dir_for_existing_compiled_cache(&missing_tmp_dir, &compiled_dir).unwrap(); + } + + #[test] + fn registry_url_hash_matches_existing_cache_layout() { + assert_eq!(registry_url_hash("foo"), "e1b19adfb2e348a2"); + } +} diff --git a/crates/aqua-registry/src/compiled.rs b/crates/aqua-registry/src/compiled.rs new file mode 100644 index 0000000000..696300bc2d --- /dev/null +++ b/crates/aqua-registry/src/compiled.rs @@ -0,0 +1,366 @@ +use crate::codec::{decode_package_rkyv, encode_package_rkyv}; +use crate::types::{AquaPackage, RegistryYaml}; +use crate::{AquaRegistryError, Result}; +use rkyv::rancor::Error as RkyvError; +use rkyv::{Archive, Deserialize as RkyvDeserialize, Serialize as RkyvSerialize}; +use std::collections::{HashMap, HashSet}; +use std::fs; +use std::path::{Path, PathBuf}; + +const INDEX_FILE: &str = "index.rkyv"; +const PACKAGES_DIR: &str = "packages"; + +#[derive(Debug, Clone)] +pub struct CompiledRegistry { + root: PathBuf, + index: CompiledRegistryIndex, +} + +#[derive(Debug, Clone)] +pub struct ParsedRegistry { + packages: HashMap, + aliases: HashMap, +} + +#[derive(Debug, Clone, Archive, RkyvDeserialize, RkyvSerialize)] +struct CompiledRegistryIndex { + packages: HashMap, + aliases: HashMap, +} + +impl CompiledRegistry { + pub fn load(root: impl AsRef) -> Result { + let root = root.as_ref().to_path_buf(); + let index = read_index(&root)?; + validate_package_files(&root, &index)?; + Ok(Self { root, index }) + } + + pub fn package(&self, package_id: &str) -> Result { + let resolved_id = self + .index + .aliases + .get(package_id) + .map_or(package_id, String::as_str); + let filename = self + .index + .packages + .get(resolved_id) + .ok_or_else(|| AquaRegistryError::PackageNotFound(package_id.to_string()))?; + let path = self.root.join(PACKAGES_DIR).join(filename); + let bytes = fs::read(&path)?; + decode_package_rkyv(resolved_id, &bytes) + } +} + +impl ParsedRegistry { + pub fn parse_yaml(source: &str) -> Result { + let registry_yaml = serde_yaml::from_str::(source)?; + Self::from_registry_yaml(registry_yaml) + } + + pub fn package(&self, package_id: &str) -> Result { + let resolved_id = self + .aliases + .get(package_id) + .map_or(package_id, String::as_str); + self.packages + .get(resolved_id) + .cloned() + .ok_or_else(|| AquaRegistryError::PackageNotFound(package_id.to_string())) + } + + pub fn write_compiled_cache(&self, root: impl AsRef) -> Result { + let root = root.as_ref().to_path_buf(); + let index = write_compiled_index(self, &root)?; + Ok(CompiledRegistry { root, index }) + } + + fn from_registry_yaml(registry_yaml: RegistryYaml) -> Result { + let package_entries = registry_yaml + .packages + .into_iter() + .filter_map(|row| canonical_package_id(&row.package).map(|id| (id, row))) + .collect::>(); + + if package_entries.is_empty() { + return Err(AquaRegistryError::RegistryNotAvailable( + "aqua registry contains no packages".to_string(), + )); + } + + let canonical_ids = package_entries + .iter() + .map(|(id, _)| id.clone()) + .collect::>(); + let mut packages = HashMap::new(); + let mut aliases = HashMap::new(); + + for (id, row) in package_entries { + for alias in &row.aliases { + if alias != &id && !canonical_ids.contains(alias.as_str()) { + aliases.insert(alias.clone(), id.clone()); + } + } + packages.insert(id, row.package); + } + + Ok(Self { packages, aliases }) + } +} + +fn read_index(root: &Path) -> Result { + let path = root.join(INDEX_FILE); + let bytes = fs::read(&path)?; + rkyv::from_bytes::(&bytes).map_err(|err| { + AquaRegistryError::RegistryNotAvailable(format!( + "failed to decode compiled aqua registry index {} from rkyv: {err}", + path.display() + )) + }) +} + +fn validate_package_files(root: &Path, index: &CompiledRegistryIndex) -> Result<()> { + let packages_dir = root.join(PACKAGES_DIR); + for filename in index.packages.values() { + let path = packages_dir.join(filename); + if !path.is_file() { + return Err(AquaRegistryError::RegistryNotAvailable(format!( + "compiled aqua registry package file is missing: {}", + path.display() + ))); + } + } + Ok(()) +} + +fn write_index(root: &Path, index: &CompiledRegistryIndex) -> Result<()> { + let path = root.join(INDEX_FILE); + let bytes = rkyv::to_bytes::(index) + .map(|bytes| bytes.to_vec()) + .map_err(|err| { + AquaRegistryError::RegistryNotAvailable(format!( + "failed to encode compiled aqua registry index {} as rkyv: {err}", + path.display() + )) + })?; + fs::write(path, bytes)?; + Ok(()) +} + +fn write_compiled_index(registry: &ParsedRegistry, root: &Path) -> Result { + let packages_dir = root.join(PACKAGES_DIR); + fs::create_dir_all(&packages_dir)?; + + let mut used_filenames = HashSet::new(); + let mut packages = HashMap::new(); + + for (id, package) in ®istry.packages { + let filename = package_filename(id, &mut used_filenames); + let path = packages_dir.join(&filename); + let content = encode_package_rkyv(package)?; + fs::write(path, content)?; + packages.insert(id.clone(), filename); + } + + let index = CompiledRegistryIndex { + packages, + aliases: registry.aliases.clone(), + }; + write_index(root, &index)?; + Ok(index) +} + +fn canonical_package_id(package: &AquaPackage) -> Option { + package + .name + .clone() + .or_else(|| { + if package.repo_owner.is_empty() || package.repo_name.is_empty() { + None + } else { + Some(format!("{}/{}", package.repo_owner, package.repo_name)) + } + }) + .or_else(|| package.path.clone()) +} + +fn package_filename(id: &str, used_filenames: &mut HashSet) -> String { + let stem = package_filename_stem(id); + let mut filename = format!("{stem}.rkyv"); + let mut suffix = 2; + while !used_filenames.insert(filename.clone()) { + filename = format!("{stem}-{suffix}.rkyv"); + suffix += 1; + } + filename +} + +fn package_filename_stem(id: &str) -> String { + let sanitized = sanitize_filename_prefix(id); + let hash = fnv1a64(id); + format!("{sanitized}-{hash:016x}") +} + +fn sanitize_filename_prefix(id: &str) -> String { + let mut prefix = String::new(); + for byte in id.bytes() { + let c = byte as char; + if c.is_ascii_alphanumeric() { + prefix.push(c.to_ascii_lowercase()); + } else { + prefix.push('_'); + } + if prefix.len() >= 80 { + break; + } + } + if prefix.is_empty() { + "package".to_string() + } else { + prefix + } +} + +/// Hashes the canonical package ID with FNV-1a 64-bit to keep compiled cache +/// filenames deterministic. The sanitized ID prefix is only for readability. +fn fnv1a64(value: &str) -> u64 { + let mut hash = 0xcbf29ce484222325u64; + for byte in value.as_bytes() { + hash ^= u64::from(*byte); + hash = hash.wrapping_mul(0x100000001b3); + } + hash +} + +#[cfg(test)] +mod tests { + use super::*; + use std::time::{SystemTime, UNIX_EPOCH}; + + #[test] + fn compiles_flat_registry_cache_and_resolves_aliases() { + let root = temp_cache_dir("compiled-aqua-registry"); + let source = r#" +packages: + - type: http + name: example/canonical-tool + repo_owner: example + repo_name: tool + url: https://example.com/tool + aliases: + - name: example/tool-alias + version_overrides: + - aliases: + - name: example/nested-alias +"#; + + let registry = compile_registry(source, &root); + let package = registry.package("example/tool-alias").unwrap(); + + assert_eq!(package.name.as_deref(), Some("example/canonical-tool")); + assert_eq!(package.repo_owner, "example"); + assert_eq!(package.repo_name, "tool"); + assert!(registry.package("example/canonical-tool").is_ok()); + assert!(matches!( + registry.package("example/tool"), + Err(AquaRegistryError::PackageNotFound(_)) + )); + assert!(matches!( + registry.package("example/nested-alias"), + Err(AquaRegistryError::PackageNotFound(_)) + )); + assert!(root.join(INDEX_FILE).exists()); + + let packages_dir = root.join(PACKAGES_DIR); + let files = fs::read_dir(&packages_dir) + .unwrap() + .collect::, _>>() + .unwrap(); + assert_eq!(files.len(), 1); + assert!(files[0].file_type().unwrap().is_file()); + + fs::remove_dir_all(root).unwrap(); + } + + #[test] + fn parsed_registry_resolves_packages_before_cache_is_written() { + let source = r#" +packages: + - type: http + name: example/canonical-tool + url: https://example.com/tool + aliases: + - name: example/tool-alias +"#; + + let registry = ParsedRegistry::parse_yaml(source).unwrap(); + let package = registry.package("example/tool-alias").unwrap(); + + assert_eq!(package.name.as_deref(), Some("example/canonical-tool")); + assert!(matches!( + registry.package("example/missing"), + Err(AquaRegistryError::PackageNotFound(_)) + )); + } + + #[test] + fn loads_compiled_registry_without_reparsing_yaml() { + let root = temp_cache_dir("compiled-aqua-registry-load"); + let source = r#" +packages: + - type: http + name: example/named-tool + url: https://example.com/tool +"#; + + compile_registry(source, &root); + let registry = CompiledRegistry::load(&root).unwrap(); + let package = registry.package("example/named-tool").unwrap(); + + assert_eq!(package.name.as_deref(), Some("example/named-tool")); + + fs::remove_dir_all(root).unwrap(); + } + + #[test] + fn load_rejects_missing_package_blob() { + let root = temp_cache_dir("compiled-aqua-registry-missing-package"); + let source = r#" +packages: + - type: http + name: example/missing-package + url: https://example.com/tool +"#; + + compile_registry(source, &root); + let packages_dir = root.join(PACKAGES_DIR); + let package_file = fs::read_dir(&packages_dir) + .unwrap() + .next() + .unwrap() + .unwrap() + .path(); + fs::remove_file(package_file).unwrap(); + + let err = CompiledRegistry::load(&root).unwrap_err(); + assert!(matches!(err, AquaRegistryError::RegistryNotAvailable(_))); + + fs::remove_dir_all(root).unwrap(); + } + + fn compile_registry(source: &str, root: &Path) -> CompiledRegistry { + ParsedRegistry::parse_yaml(source) + .unwrap() + .write_compiled_cache(root) + .unwrap() + } + + fn temp_cache_dir(name: &str) -> PathBuf { + let nanos = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos(); + std::env::temp_dir().join(format!("{name}-{nanos}")) + } +} diff --git a/crates/aqua-registry/src/lib.rs b/crates/aqua-registry/src/lib.rs index 0616bb404e..7a0febabf4 100644 --- a/crates/aqua-registry/src/lib.rs +++ b/crates/aqua-registry/src/lib.rs @@ -1,16 +1,20 @@ //! Aqua Registry //! //! This crate provides functionality for working with Aqua package registry files. -//! It can load registry data from baked-in files, local repositories, or remote HTTP sources. +//! It handles parsing registry YAML, looking up packages, and managing compiled +//! registry cache files. Fetching policy, remote fallback behavior, and baked-in +//! registry integration live in mise. +mod cache; mod codec; -mod registry; +mod compiled; mod template; pub mod types; // Re-export only what's needed by the main mise crate +pub use cache::RegistryCache; pub use codec::{decode_package_rkyv, encode_package_rkyv}; -pub use registry::{AquaRegistry, DefaultRegistryFetcher, FileCacheStore, NoOpCacheStore}; +pub use compiled::{CompiledRegistry, ParsedRegistry}; pub use types::{ AquaChecksum, AquaChecksumType, AquaCosign, AquaFile, AquaMinisignType, AquaPackage, AquaPackageType, AquaVar, RegistryYaml, @@ -36,44 +40,3 @@ pub enum AquaRegistryError { } pub type Result = std::result::Result; - -/// Configuration for the Aqua registry -#[derive(Debug, Clone)] -pub struct AquaRegistryConfig { - /// Path to cache directory for cloned repositories - pub cache_dir: std::path::PathBuf, - /// URL of the registry repository (if None, only baked registry will be used) - pub registry_url: Option, - /// Whether to use the baked-in registry - pub use_baked_registry: bool, - /// Whether to skip network operations (prefer offline mode) - pub prefer_offline: bool, -} - -impl Default for AquaRegistryConfig { - fn default() -> Self { - Self { - cache_dir: std::env::temp_dir().join("aqua-registry"), - registry_url: Some("https://github.com/aquaproj/aqua-registry".to_string()), - use_baked_registry: true, - prefer_offline: false, - } - } -} - -/// Trait for fetching aqua packages from various sources -#[allow(async_fn_in_trait)] -pub trait RegistryFetcher { - /// Fetch and parse a package definition for the given package ID. - async fn fetch_package(&self, package_id: &str) -> Result; -} - -/// Trait for caching registry data -pub trait CacheStore { - /// Check if cached data exists and is fresh - fn is_fresh(&self, key: &str) -> bool; - /// Store data in cache - fn store(&self, key: &str, data: &[u8]) -> std::io::Result<()>; - /// Retrieve data from cache - fn retrieve(&self, key: &str) -> std::io::Result>>; -} diff --git a/crates/aqua-registry/src/registry.rs b/crates/aqua-registry/src/registry.rs deleted file mode 100644 index 475a18401d..0000000000 --- a/crates/aqua-registry/src/registry.rs +++ /dev/null @@ -1,206 +0,0 @@ -use crate::types::{AquaPackage, RegistryYaml}; -use crate::{AquaRegistryConfig, AquaRegistryError, CacheStore, RegistryFetcher, Result}; -use std::collections::HashMap; -use std::path::PathBuf; -use std::sync::LazyLock; -use tokio::sync::Mutex; - -/// The main Aqua registry implementation -#[derive(Debug)] -pub struct AquaRegistry -where - F: RegistryFetcher, - C: CacheStore, -{ - #[allow(dead_code)] - config: AquaRegistryConfig, - fetcher: F, - #[allow(dead_code)] - cache_store: C, - #[allow(dead_code)] - repo_exists: bool, -} - -/// Default implementation of RegistryFetcher -#[derive(Debug, Clone)] -pub struct DefaultRegistryFetcher { - config: AquaRegistryConfig, -} - -/// No-op implementation of CacheStore -#[derive(Debug, Clone, Default)] -pub struct NoOpCacheStore; - -/// File-based cache store implementation -#[derive(Debug, Clone)] -pub struct FileCacheStore { - cache_dir: PathBuf, -} - -impl AquaRegistry { - /// Create a new AquaRegistry with the given configuration - pub fn new(config: AquaRegistryConfig) -> Self { - let repo_exists = Self::check_repo_exists(&config.cache_dir); - let fetcher = DefaultRegistryFetcher { - config: config.clone(), - }; - Self { - config, - fetcher, - cache_store: NoOpCacheStore, - repo_exists, - } - } - - /// Create a new AquaRegistry with custom fetcher and cache store - pub fn with_fetcher_and_cache( - config: AquaRegistryConfig, - fetcher: F, - cache_store: C, - ) -> AquaRegistry - where - F: RegistryFetcher, - C: CacheStore, - { - let repo_exists = Self::check_repo_exists(&config.cache_dir); - AquaRegistry { - config, - fetcher, - cache_store, - repo_exists, - } - } - - fn check_repo_exists(cache_dir: &std::path::Path) -> bool { - cache_dir.join(".git").exists() - } -} - -impl AquaRegistry -where - F: RegistryFetcher, - C: CacheStore, -{ - /// Get a package definition by ID - pub async fn package(&self, id: &str) -> Result { - static CACHE: LazyLock>> = - LazyLock::new(|| Mutex::new(HashMap::new())); - - if let Some(pkg) = CACHE.lock().await.get(id) { - return Ok(pkg.clone()); - } - - let mut pkg = self.fetcher.fetch_package(id).await?; - - pkg.setup_version_filter()?; - CACHE.lock().await.insert(id.to_string(), pkg.clone()); - Ok(pkg) - } -} - -impl RegistryFetcher for DefaultRegistryFetcher { - async fn fetch_package(&self, package_id: &str) -> Result { - let path_id = package_id - .split('/') - .collect::>() - .join(std::path::MAIN_SEPARATOR_STR); - let path = self - .config - .cache_dir - .join("pkgs") - .join(&path_id) - .join("registry.yaml"); - - // Try to read from local repository first - if self.config.cache_dir.join(".git").exists() && path.exists() { - log::trace!("reading aqua-registry for {package_id} from repo at {path:?}"); - let contents = std::fs::read_to_string(&path)?; - let registry = serde_yaml::from_str::(&contents)?; - return registry - .packages - .into_iter() - .next() - .map(|row| row.package) - .ok_or_else(|| AquaRegistryError::PackageNotFound(package_id.to_string())); - } - - Err(AquaRegistryError::RegistryNotAvailable(format!( - "no aqua-registry found for {package_id}" - ))) - } -} - -impl CacheStore for NoOpCacheStore { - fn is_fresh(&self, _key: &str) -> bool { - false - } - - fn store(&self, _key: &str, _data: &[u8]) -> std::io::Result<()> { - Ok(()) - } - - fn retrieve(&self, _key: &str) -> std::io::Result>> { - Ok(None) - } -} - -impl FileCacheStore { - pub fn new(cache_dir: PathBuf) -> Self { - Self { cache_dir } - } -} - -impl CacheStore for FileCacheStore { - fn is_fresh(&self, key: &str) -> bool { - // Check if cache entry exists and is less than a week old - #[allow(clippy::collapsible_if)] - if let Ok(metadata) = std::fs::metadata(self.cache_dir.join(key)) { - if let Ok(modified) = metadata.modified() { - let age = std::time::SystemTime::now() - .duration_since(modified) - .unwrap_or_default(); - return age < std::time::Duration::from_secs(7 * 24 * 60 * 60); // 1 week - } - } - false - } - - fn store(&self, key: &str, data: &[u8]) -> std::io::Result<()> { - let path = self.cache_dir.join(key); - if let Some(parent) = path.parent() { - std::fs::create_dir_all(parent)?; - } - std::fs::write(path, data) - } - - fn retrieve(&self, key: &str) -> std::io::Result>> { - let path = self.cache_dir.join(key); - match std::fs::read(path) { - Ok(data) => Ok(Some(data)), - Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(None), - Err(e) => Err(e), - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[tokio::test] - async fn test_registry_creation() { - let config = AquaRegistryConfig::default(); - let registry = AquaRegistry::new(config); - - // This should not panic - registry should be created successfully - drop(registry); - } - - #[test] - fn test_cache_store() { - let cache = NoOpCacheStore; - assert!(!cache.is_fresh("test")); - assert!(cache.store("test", b"data").is_ok()); - assert!(cache.retrieve("test").unwrap().is_none()); - } -} diff --git a/docs/dev-tools/backends/aqua.md b/docs/dev-tools/backends/aqua.md index 3b0f2f03f4..7fc15931e3 100644 --- a/docs/dev-tools/backends/aqua.md +++ b/docs/dev-tools/backends/aqua.md @@ -22,6 +22,28 @@ always require plugins like asdf/vfox. The code for this is inside the mise repository at [`./src/backend/aqua.rs`](https://github.com/jdx/mise/blob/main/src/backend/aqua.rs). +## Custom Registry + +Set [`aqua.registry_url`](/configuration/settings.html#aqua-registry_url) to use a custom aqua +registry repository: + +```toml +[settings] +aqua.registry_url = "https://github.com/my-org/aqua-registry" +``` + +mise downloads `registry.yaml` from the repository root, falling back to `registry.yml` if needed. +Downloaded registry source is cached under `MISE_CACHE_DIR` for +[`aqua.registry_cache_ttl`](/configuration/settings.html#aqua-registry_cache_ttl), which defaults +to one week. + +After a refreshed registry source is downloaded, mise hashes the source and uses that hash in the +compiled registry cache path. When a new compiled cache is successfully loaded or written, older +compiled caches for the same registry URL are pruned. + +When `aqua.baked_registry` is enabled, the baked-in registry remains a fallback for packages missing +from the custom registry. + ## Usage The following installs the latest version of ripgrep and sets it as the active version on PATH: diff --git a/e2e/backend/test_aqua_custom_registry_cache b/e2e/backend/test_aqua_custom_registry_cache new file mode 100644 index 0000000000..c8ce597ee8 --- /dev/null +++ b/e2e/backend/test_aqua_custom_registry_cache @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +export MISE_EXPERIMENTAL=1 +export MISE_LOCKFILE=1 +export MISE_AQUA_BAKED_REGISTRY=0 +export MISE_AQUA_REGISTRY_URL="file://$ROOT/vendor/aqua-registry" + +cat <<'EOF_MISE' >mise.toml +[tools] +"aqua:BurntSushi/ripgrep" = "14.1.1" +EOF_MISE + +output=$(MISE_TIMINGS=1 mise lock --platform linux-x64 2>&1) +printf '%s\n' "$output" + +[[ $output == *"mise::aqua::aqua_registry_wrapper::aqua_registry::parse_yaml"* ]] || + fail "expected custom aqua registry parse timing in output" +[[ $output == *"mise::aqua::aqua_registry_wrapper::aqua_registry::write_compiled_cache"* ]] || + fail "expected custom aqua registry cache generation timing in output" + +assert_contains "cat mise.lock" 'backend = "aqua:BurntSushi/ripgrep"' +assert_contains "cat mise.lock" '[[tools."aqua:BurntSushi/ripgrep"]]' + +second_output=$(MISE_TIMINGS=1 mise lock --platform linux-x64 2>&1) +printf '%s\n' "$second_output" + +[[ $second_output != *"mise::aqua::aqua_registry_wrapper::aqua_registry::write_compiled_cache"* ]] || + fail "expected unchanged registry source hash to reuse the compiled cache" diff --git a/e2e/backend/test_aqua_vars b/e2e/backend/test_aqua_vars index 55b4340b2c..b202ce2450 100644 --- a/e2e/backend/test_aqua_vars +++ b/e2e/backend/test_aqua_vars @@ -7,11 +7,12 @@ export MISE_AQUA_BAKED_REGISTRY=0 detect_platform REGISTRY_DIR="$PWD/aqua-registry-local" -mkdir -p "$REGISTRY_DIR/pkgs/example/vars-tool" +mkdir -p "$REGISTRY_DIR" -cat <<'EOF_REGISTRY' >"$REGISTRY_DIR/pkgs/example/vars-tool/registry.yaml" +cat <<'EOF_REGISTRY' >"$REGISTRY_DIR/registry.yml" packages: - type: http + name: example/vars-tool supported_envs: - darwin - linux @@ -25,13 +26,6 @@ packages: required: true EOF_REGISTRY -( - cd "$REGISTRY_DIR" - git init -q - git add pkgs/example/vars-tool/registry.yaml - git commit -qm "init local aqua registry" -) - export MISE_AQUA_REGISTRY_URL="file://$REGISTRY_DIR" cat <<'EOF_MISE' >mise.toml @@ -39,7 +33,7 @@ cat <<'EOF_MISE' >mise.toml "aqua:example/vars-tool" = { version = "1.0.0", fixture_version = "1.0.0" } EOF_MISE -mise install +MISE_TIMINGS=1 mise install assert_contains "mise x -- hello-world" "hello world" cat <<'EOF_MISE' >mise.toml @@ -47,7 +41,7 @@ cat <<'EOF_MISE' >mise.toml "aqua:example/vars-tool" = { version = "1.0.0", fixture_version = "2.0.0" } EOF_MISE -mise install --force aqua:example/vars-tool@1.0.0 +MISE_TIMINGS=1 mise install --force aqua:example/vars-tool@1.0.0 assert_contains "mise x -- hello-world" "hello world 2.0.0" rm -f mise.lock @@ -56,7 +50,7 @@ cat <<'EOF_MISE' >mise.toml "aqua:example/vars-tool" = { version = "1.0.0", vars = { fixture_version = "2.0.0" } } EOF_MISE -mise lock --platform "$MISE_PLATFORM" +MISE_TIMINGS=1 mise lock --platform "$MISE_PLATFORM" assert_contains "cat mise.lock" '"vars.fixture_version" = "2.0.0"' assert_contains "cat mise.lock" 'url = "https://mise.en.dev/test-fixtures/hello-world-2.0.0.tar.gz"' cp mise.lock nested-vars.lock @@ -66,7 +60,7 @@ cat <<'EOF_MISE' >mise.toml "aqua:example/vars-tool" = { version = "1.0.0", fixture_version = "2.0.0" } EOF_MISE -mise lock --platform "$MISE_PLATFORM" +MISE_TIMINGS=1 mise lock --platform "$MISE_PLATFORM" assert_succeed "cmp mise.lock nested-vars.lock" cat <<'EOF_MISE' >mise.toml diff --git a/e2e/lockfile/test_lockfile_aqua_cross_platform_override b/e2e/lockfile/test_lockfile_aqua_cross_platform_override index aeaf704a3d..6641e2d38d 100755 --- a/e2e/lockfile/test_lockfile_aqua_cross_platform_override +++ b/e2e/lockfile/test_lockfile_aqua_cross_platform_override @@ -21,11 +21,12 @@ TARGET_PLATFORM="windows-x64" TARGET_AQUA_OS="windows" REGISTRY_DIR="$PWD/aqua-registry-local" -mkdir -p "$REGISTRY_DIR/pkgs/example/testtool" +mkdir -p "$REGISTRY_DIR" -cat <"$REGISTRY_DIR/pkgs/example/testtool/registry.yaml" +cat <"$REGISTRY_DIR/registry.yaml" packages: - type: http + name: example/testtool supported_envs: - linux - darwin @@ -37,13 +38,6 @@ packages: format: pkg EOF_REGISTRY -( - cd "$REGISTRY_DIR" - git init -q - git add pkgs/example/testtool/registry.yaml - git commit -qm "init local aqua registry" -) - export MISE_AQUA_BAKED_REGISTRY=0 export MISE_AQUA_REGISTRY_URL="file://$REGISTRY_DIR" @@ -52,7 +46,8 @@ cat <<'EOF_MISE' >mise.toml "aqua:example/testtool" = "1.0.0" EOF_MISE -output=$(mise lock --platform "$TARGET_PLATFORM" 2>&1) +output=$(MISE_TIMINGS=1 mise lock --platform "$TARGET_PLATFORM" 2>&1) +printf '%s\n' "$output" assert_contains "echo '$output'" "Targeting 1 platform(s)" assert_contains "echo '$output'" "$TARGET_PLATFORM" diff --git a/schema/mise.json b/schema/mise.json index c340a8a46f..74a7e0630e 100644 --- a/schema/mise.json +++ b/schema/mise.json @@ -506,8 +506,12 @@ "description": "Use minisign to verify aqua tool signatures.", "type": "boolean" }, + "registry_cache_ttl": { + "description": "How long to cache downloaded aqua registry source files.", + "type": "string" + }, "registry_url": { - "description": "URL to fetch aqua registry from.", + "description": "URL of an aqua registry repository to fetch.", "type": "string" }, "slsa": { diff --git a/settings.toml b/settings.toml index 1043145675..483c6ebccf 100644 --- a/settings.toml +++ b/settings.toml @@ -125,14 +125,38 @@ description = "Use minisign to verify aqua tool signatures." env = "MISE_AQUA_MINISIGN" type = "Bool" +[aqua.registry_cache_ttl] +default_docs = "1w" +description = "How long to cache downloaded aqua registry source files." +docs = """ +How long downloaded aqua registry source files remain fresh before mise re-downloads them. + +When the downloaded source changes, mise writes the new source cache atomically, compiles a new +source-hash-scoped registry cache, and prunes older compiled caches for that registry URL after +the new compiled cache is available. + +Set to `0s` to re-download remote registries every time. +""" +env = "MISE_AQUA_REGISTRY_CACHE_TTL" +optional = true +type = "Duration" + [aqua.registry_url] -description = "URL to fetch aqua registry from." +description = "URL of an aqua registry repository to fetch." docs = """ -URL to fetch aqua registry from. This is used to install tools from the aqua registry. +URL of an aqua registry repository to fetch. mise downloads `registry.yaml` from the repository +root and falls back to `registry.yml` if needed. + +Downloaded registries are cached according to `aqua.registry_cache_ttl`, which defaults to one +week. To refresh sooner, run `mise cache clear`, set `aqua.registry_cache_ttl = "0s"`, or change +`MISE_CACHE_DIR` to use a different cache location. -If this is set, the baked-in aqua registry is not used. +If this is set, mise checks the configured registry first. When `aqua.baked_registry` is enabled, +the baked-in aqua registry remains a fallback for packages missing from the configured registry. -By default, the official aqua registry is used: https://github.com/aquaproj/aqua-registry +By default, mise uses the baked-in official aqua registry when `aqua.baked_registry` is enabled. +If the baked registry is disabled, mise downloads the official registry: +https://github.com/aquaproj/aqua-registry """ env = "MISE_AQUA_REGISTRY_URL" optional = true diff --git a/src/aqua/aqua_registry_wrapper.rs b/src/aqua/aqua_registry_wrapper.rs index f0e132a6a6..523732bd67 100644 --- a/src/aqua/aqua_registry_wrapper.rs +++ b/src/aqua/aqua_registry_wrapper.rs @@ -1,88 +1,83 @@ use crate::config::Settings; -use crate::git::{CloneOptions, Git}; -use crate::{dirs, duration::WEEKLY, file}; -use aqua_registry::{ - AquaRegistry, AquaRegistryConfig, AquaRegistryError, NoOpCacheStore, RegistryFetcher, -}; +use crate::http::HTTP; +use crate::{dirs, duration}; +use aqua_registry::{AquaRegistryError, CompiledRegistry, ParsedRegistry, RegistryCache}; use eyre::Result; +use reqwest::header::{ACCEPT, HeaderMap, HeaderValue}; use std::collections::HashMap; use std::path::PathBuf; -use std::sync::LazyLock as Lazy; -use tokio::sync::Mutex; +use std::sync::{Arc, LazyLock as Lazy}; +use tokio::sync::{Mutex, OnceCell}; +use url::Url; static AQUA_REGISTRY_PATH: Lazy = Lazy::new(|| dirs::CACHE.join("aqua-registry")); static AQUA_DEFAULT_REGISTRY_URL: &str = "https://github.com/aquaproj/aqua-registry"; +pub(crate) const DEFAULT_AQUA_REGISTRY_CACHE_TTL: duration::Duration = duration::WEEKLY; -pub static AQUA_REGISTRY: Lazy = Lazy::new(|| { - MiseAquaRegistry::standard().unwrap_or_else(|err| { - warn!("failed to initialize aqua registry: {err:?}"); - MiseAquaRegistry::default() - }) -}); +pub static AQUA_REGISTRY: Lazy = Lazy::new(AquaRegistry::from_settings); -/// Wrapper around the aqua-registry crate that provides mise-specific functionality #[derive(Debug)] -pub struct MiseAquaRegistry { - inner: AquaRegistry, - #[allow(dead_code)] - path: PathBuf, - #[allow(dead_code)] - repo_exists: bool, -} - -impl Default for MiseAquaRegistry { - fn default() -> Self { - let config = AquaRegistryConfig::default(); - let inner = aqua_registry(config.clone()); - Self { - inner, - path: config.cache_dir, - repo_exists: false, - } - } +pub struct AquaRegistry { + registry_url: Option, + use_baked_registry: bool, + prefer_offline: bool, + source_cache_ttl: duration::Duration, + cache: RegistryCache, + registry: Arc>, String>>>, } -impl MiseAquaRegistry { - pub fn standard() -> Result { +impl AquaRegistry { + fn from_settings() -> Self { let path = AQUA_REGISTRY_PATH.clone(); - let repo = Git::new(&path); let settings = Settings::get(); let registry_url = - settings - .aqua - .registry_url - .as_deref() - .or(if settings.aqua.baked_registry { - None - } else { - Some(AQUA_DEFAULT_REGISTRY_URL) - }); + settings.aqua.registry_url.clone().or_else(|| { + (!settings.aqua.baked_registry).then(|| AQUA_DEFAULT_REGISTRY_URL.into()) + }); - if let Some(registry_url) = registry_url { - if repo.exists() { - fetch_latest_repo(&repo)?; - } else { - info!("cloning aqua registry from {registry_url} to {path:?}"); - repo.clone(registry_url, CloneOptions::default())?; - } - } + Self::new( + path, + registry_url, + settings.aqua.baked_registry, + settings.prefer_offline(), + settings.aqua_registry_cache_ttl(), + ) + } - let config = AquaRegistryConfig { - cache_dir: path.clone(), - registry_url: registry_url.map(|s| s.to_string()), - use_baked_registry: settings.aqua.baked_registry, - prefer_offline: settings.prefer_offline(), - }; + fn new( + cache_dir: PathBuf, + registry_url: Option, + use_baked_registry: bool, + prefer_offline: bool, + source_cache_ttl: duration::Duration, + ) -> Self { + Self { + registry_url, + use_baked_registry, + prefer_offline, + source_cache_ttl, + cache: RegistryCache::new(cache_dir), + registry: Arc::new(OnceCell::new()), + } + } +} - let inner = aqua_registry(config); +#[derive(Debug)] +enum ActiveRegistry { + Compiled(CompiledRegistry), + Parsed(Arc), +} - Ok(Self { - inner, - path, - repo_exists: repo.exists(), - }) +impl ActiveRegistry { + fn package(&self, package_id: &str) -> aqua_registry::Result { + match self { + Self::Compiled(registry) => registry.package(package_id), + Self::Parsed(registry) => registry.package(package_id), + } } +} +impl AquaRegistry { pub async fn package(&self, id: &str) -> Result { static CACHE: Lazy>> = Lazy::new(|| Mutex::new(HashMap::new())); @@ -91,59 +86,27 @@ impl MiseAquaRegistry { return Ok(pkg.clone()); } - let pkg = self.inner.package(id).await?; + let mut pkg = self.fetch_package(id).await?; + pkg.setup_version_filter()?; CACHE.lock().await.insert(id.to_string(), pkg.clone()); Ok(pkg) } -} -#[derive(Debug, Clone)] -struct MiseRegistryFetcher { - config: AquaRegistryConfig, -} - -fn aqua_registry(config: AquaRegistryConfig) -> AquaRegistry { - AquaRegistry::with_fetcher_and_cache( - config.clone(), - MiseRegistryFetcher { config }, - NoOpCacheStore, - ) -} - -impl RegistryFetcher for MiseRegistryFetcher { async fn fetch_package(&self, package_id: &str) -> aqua_registry::Result { - if self.config.use_baked_registry - && !self.config.cache_dir.join(".git").exists() - && let Some(package) = super::standard_registry::package(package_id) - { - log::trace!("reading baked-in aqua package for {package_id}"); - return package; - } - - let path_id = package_id - .split('/') - .collect::>() - .join(std::path::MAIN_SEPARATOR_STR); - let path = self - .config - .cache_dir - .join("pkgs") - .join(&path_id) - .join("registry.yaml"); - - if self.config.cache_dir.join(".git").exists() && path.exists() { - log::trace!("reading aqua-registry for {package_id} from repo at {path:?}"); - let contents = std::fs::read_to_string(&path)?; - let registry = serde_yaml::from_str::(&contents)?; - return registry - .packages - .into_iter() - .next() - .map(|row| row.package) - .ok_or_else(|| AquaRegistryError::PackageNotFound(package_id.to_string())); + match self.registry().await { + Ok(Some(registry)) => match registry.package(package_id) { + Ok(package) => { + log::trace!("reading aqua package for {package_id} from custom registry"); + return Ok(package); + } + Err(AquaRegistryError::PackageNotFound(_)) => {} + Err(err) => return Err(err), + }, + Ok(None) => {} + Err(err) => return Err(err), } - if self.config.use_baked_registry + if self.use_baked_registry && let Some(package) = super::standard_registry::package(package_id) { log::trace!("reading baked-in aqua package for {package_id}"); @@ -154,21 +117,267 @@ impl RegistryFetcher for MiseRegistryFetcher { "no aqua-registry found for {package_id}" ))) } + + async fn registry(&self) -> aqua_registry::Result>> { + let registry = self + .registry + .get_or_init(|| async { self.load_registry().await.map_err(|err| err.to_string()) }) + .await; + registry + .clone() + .map_err(AquaRegistryError::RegistryNotAvailable) + } + + async fn load_registry(&self) -> aqua_registry::Result>> { + let Some(registry_url) = self.registry_url.as_deref() else { + return Ok(None); + }; + + let source = self.registry_source(registry_url).await?; + let source_hash = RegistryCache::source_hash(&source); + + if let Some(registry) = self + .load_compiled_registry(registry_url, &source_hash) + .await + { + spawn_stale_compiled_prune( + self.cache.clone(), + registry_url.to_string(), + source_hash.clone(), + ); + return Ok(Some(Arc::new(ActiveRegistry::Compiled(registry)))); + } + + let registry = parse_registry_source(registry_url.to_string(), source).await?; + spawn_compiled_registry_cache_writer( + registry_url.to_string(), + self.cache.clone(), + source_hash, + Arc::clone(®istry), + ); + Ok(Some(Arc::new(ActiveRegistry::Parsed(registry)))) + } + + async fn load_compiled_registry( + &self, + registry_url: &str, + source_hash: &str, + ) -> Option { + let cache = self.cache.clone(); + let registry_url = registry_url.to_string(); + let cache_registry_url = registry_url.clone(); + let cache_source_hash = source_hash.to_string(); + match tokio::task::spawn_blocking(move || { + cache.load_compiled(&cache_registry_url, &cache_source_hash) + }) + .await + { + Ok(Ok(registry)) => Some(registry), + Ok(Err(err)) => { + log::debug!("compiled aqua registry cache miss for {registry_url}: {err}"); + None + } + Err(err) => { + warn!("failed to load compiled aqua registry cache for {registry_url}: {err}"); + None + } + } + } + + async fn registry_source(&self, registry_url: &str) -> aqua_registry::Result { + if Url::parse(registry_url).is_ok_and(|url| url.scheme() == "file") { + return download_registry_source(registry_url).await; + } + + if let Some(source) = self + .cache + .read_fresh_source(registry_url, self.source_cache_ttl)? + { + return Ok(source); + } + + if self.prefer_offline { + trace!("using cached aqua registry source due to prefer-offline mode"); + return self + .cache + .read_source(registry_url) + .map_err(|err| { + AquaRegistryError::RegistryNotAvailable(format!( + "failed to read cached aqua registry source {} while prefer-offline mode is enabled: {err}", + self.cache.source_path(registry_url).display() + )) + })? + .ok_or_else(|| { + AquaRegistryError::RegistryNotAvailable(format!( + "failed to read cached aqua registry source {} while prefer-offline mode is enabled: cache file does not exist", + self.cache.source_path(registry_url).display() + )) + }); + } + + let source = download_registry_source(registry_url).await?; + self.cache.write_source(registry_url, &source)?; + Ok(source) + } } -fn fetch_latest_repo(repo: &Git) -> Result<()> { - if file::modified_duration(&repo.dir)? < WEEKLY { - return Ok(()); +async fn download_registry_source(registry_url: &str) -> aqua_registry::Result { + let mut errors = Vec::new(); + let github_repo = github_repo_slug(registry_url); + + for file_name in ["registry.yaml", "registry.yml"] { + let source = if let Some((owner, repo)) = github_repo.as_ref() { + let url = format!("https://api.github.com/repos/{owner}/{repo}/contents/{file_name}"); + let mut headers = HeaderMap::new(); + headers.insert( + ACCEPT, + HeaderValue::from_static("application/vnd.github.raw"), + ); + HTTP.get_text_request(url.as_str()) + .headers(&headers) + .send() + .await + .map_err(|err| { + AquaRegistryError::RegistryNotAvailable(format!( + "failed to download aqua registry source {url}: {err}" + )) + }) + } else { + match registry_file_url(registry_url, file_name) { + Ok(url) => download_registry_url(url.as_str()).await, + Err(err) => Err(err), + } + }; + + match source { + Ok(source) => return Ok(source), + Err(err) => errors.push(err.to_string()), + } } - if Settings::get().prefer_offline() { - trace!("skipping aqua registry update due to prefer-offline mode"); - return Ok(()); + if github_repo.is_none() { + match download_registry_url(registry_url).await { + Ok(source) => return Ok(source), + Err(err) => errors.push(err.to_string()), + } } - info!("updating aqua registry repo"); - repo.update(None)?; - Ok(()) + Err(AquaRegistryError::RegistryNotAvailable(format!( + "failed to download aqua registry from {registry_url}: {}", + errors.join("; ") + ))) +} + +async fn download_registry_url(url: &str) -> aqua_registry::Result { + let parsed = Url::parse(url).map_err(|err| { + AquaRegistryError::RegistryNotAvailable(format!("invalid aqua registry URL {url}: {err}")) + })?; + + if parsed.scheme() == "file" { + let path = parsed.to_file_path().map_err(|_| { + AquaRegistryError::RegistryNotAvailable(format!("invalid aqua registry URL {url}")) + })?; + let path_display = path.display().to_string(); + return tokio::task::spawn_blocking(move || { + std::fs::read_to_string(&path).map_err(|err| { + AquaRegistryError::RegistryNotAvailable(format!( + "failed to read aqua registry source {path_display}: {err}" + )) + }) + }) + .await + .map_err(|err| { + AquaRegistryError::RegistryNotAvailable(format!( + "failed to read aqua registry source on blocking worker: {err}" + )) + })?; + } + + HTTP.get_text(parsed).await.map_err(|err| { + AquaRegistryError::RegistryNotAvailable(format!( + "failed to download aqua registry source {url}: {err}" + )) + }) +} + +fn registry_file_url(registry_url: &str, file_name: &str) -> aqua_registry::Result { + let mut url = Url::parse(registry_url).map_err(|err| { + AquaRegistryError::RegistryNotAvailable(format!( + "invalid aqua registry URL {registry_url}: {err}" + )) + })?; + let path = url.path().trim_end_matches('/'); + url.set_path(&format!("{path}/{file_name}")); + url.set_query(None); + url.set_fragment(None); + Ok(url) +} + +async fn parse_registry_source( + registry_url: String, + source: String, +) -> aqua_registry::Result> { + tokio::task::spawn_blocking(move || { + info!("parsing aqua registry from {registry_url}"); + measure!("aqua_registry::parse_yaml", { + ParsedRegistry::parse_yaml(&source).map(Arc::new) + }) + }) + .await + .map_err(|err| { + AquaRegistryError::RegistryNotAvailable(format!( + "failed to parse aqua registry on blocking worker: {err}" + )) + })? +} + +fn spawn_stale_compiled_prune(cache: RegistryCache, registry_url: String, source_hash: String) { + tokio::task::spawn_blocking(move || { + cache.prune_stale_compiled(®istry_url, &source_hash); + }); +} + +fn spawn_compiled_registry_cache_writer( + registry_url: String, + cache: RegistryCache, + source_hash: String, + registry: Arc, +) { + tokio::task::spawn_blocking(move || { + if cache.load_compiled(®istry_url, &source_hash).is_ok() { + cache.prune_stale_compiled(®istry_url, &source_hash); + return; + } + + info!("writing compiled aqua registry cache for {registry_url}"); + if let Err(err) = measure!("aqua_registry::write_compiled_cache", { + cache + .write_compiled(®istry_url, &source_hash, registry.as_ref()) + .map(|_| ()) + }) { + warn!("failed to write compiled aqua registry cache for {registry_url}: {err}"); + } + }); +} + +fn github_repo_slug(registry_url: &str) -> Option<(String, String)> { + let url = Url::parse(registry_url).ok()?; + if url.scheme() != "https" + || url.host_str()? != "github.com" + || url.query().is_some() + || url.fragment().is_some() + { + return None; + } + + let mut segments = url.path_segments()?; + let owner = segments.next()?; + let repo = segments.next()?.trim_end_matches(".git"); + if owner.is_empty() || repo.is_empty() || segments.any(|segment| !segment.is_empty()) { + return None; + } + + Some((owner.to_string(), repo.to_string())) } struct AquaSuggestionsCache { @@ -219,43 +428,262 @@ pub use aqua_registry::{ #[cfg(test)] mod tests { use super::*; - use std::path::PathBuf; - - fn test_fetcher(cache_dir: PathBuf, use_baked_registry: bool) -> MiseRegistryFetcher { - MiseRegistryFetcher { - config: AquaRegistryConfig { - cache_dir, - registry_url: Some("https://example.com/custom-aqua-registry".to_string()), - use_baked_registry, - prefer_offline: false, - }, - } + use std::path::{Path, PathBuf}; + + #[test] + fn github_slug_only_handles_https_repo_urls() { + assert_eq!( + github_repo_slug("https://github.com/aquaproj/aqua-registry"), + Some(("aquaproj".to_string(), "aqua-registry".to_string())) + ); + assert_eq!( + github_repo_slug("https://github.com/aquaproj/aqua-registry.git"), + Some(("aquaproj".to_string(), "aqua-registry".to_string())) + ); + assert_eq!( + github_repo_slug("https://github.com/aquaproj/aqua-registry/"), + Some(("aquaproj".to_string(), "aqua-registry".to_string())) + ); + assert_eq!( + github_repo_slug("http://github.com/aqua/aqua-registry"), + None + ); + assert_eq!( + github_repo_slug("https://api.github.com/repos/aquaproj/aqua-registry"), + None + ); + assert_eq!( + github_repo_slug("git@github.com:aquaproj/aqua-registry.git"), + None + ); + assert_eq!( + github_repo_slug("https://github.com/aquaproj/aqua-registry?ref=main"), + None + ); + } + + #[test] + fn registry_file_url_appends_registry_file_name() { + assert_eq!( + registry_file_url("https://example.com/aqua-registry/", "registry.yml") + .unwrap() + .as_str(), + "https://example.com/aqua-registry/registry.yml" + ); + assert_eq!( + registry_file_url( + "https://example.com/aqua-registry?ref=main", + "registry.yaml" + ) + .unwrap() + .as_str(), + "https://example.com/aqua-registry/registry.yaml" + ); + } + + #[test] + fn compiled_registry_cache_is_scoped_by_registry_url() { + let cache = RegistryCache::new("/cache"); + let first = cache.compiled_dir("https://example.com/one", "source"); + let second = cache.compiled_dir("https://example.com/two", "source"); + + assert_ne!(first.parent(), second.parent()); + assert_eq!( + first.file_name().and_then(|name| name.to_str()), + Some("source") + ); } #[tokio::test] - async fn test_custom_registry_falls_back_to_baked_registry_when_enabled() { + async fn custom_registry_load_failure_does_not_fall_back_to_baked_registry() { let temp = tempfile::tempdir().unwrap(); - std::fs::create_dir(temp.path().join(".git")).unwrap(); + let missing_registry = temp.path().join("missing-registry"); + let err = test_registry( + temp.path().to_path_buf(), + Some(file_registry_url(&missing_registry)), + true, + ) + .fetch_package("01mf02/jaq") + .await + .unwrap_err(); - let package = test_fetcher(temp.path().to_path_buf(), true) - .fetch_package("01mf02/jaq") - .await - .unwrap(); + assert!(matches!(err, AquaRegistryError::RegistryNotAvailable(_))); + } + + #[tokio::test] + async fn baked_registry_fallback_handles_custom_registry_package_miss() { + let temp = tempfile::tempdir().unwrap(); + let registry_dir = temp.path().join("custom-registry"); + std::fs::create_dir(®istry_dir).unwrap(); + std::fs::write( + registry_dir.join("registry.yml"), + "packages:\n - name: example/custom\n repo_owner: example\n repo_name: custom\n", + ) + .unwrap(); + + let package = test_registry( + temp.path().to_path_buf(), + Some(file_registry_url(®istry_dir)), + true, + ) + .fetch_package("01mf02/jaq") + .await + .unwrap(); + + assert_eq!(package.repo_owner, "01mf02"); + assert_eq!(package.repo_name, "jaq"); + } + + #[tokio::test] + async fn custom_registry_does_not_fall_back_when_baked_registry_disabled() { + let temp = tempfile::tempdir().unwrap(); + let missing_registry = temp.path().join("missing-registry"); + + let err = test_registry( + temp.path().to_path_buf(), + Some(file_registry_url(&missing_registry)), + false, + ) + .fetch_package("01mf02/jaq") + .await + .unwrap_err(); + + assert!(matches!(err, AquaRegistryError::RegistryNotAvailable(_))); + } + + #[tokio::test] + async fn parses_bundled_registry_from_local_source() { + let temp = tempfile::tempdir().unwrap(); + let registry_dir = Path::new(env!("CARGO_MANIFEST_DIR")).join("vendor/aqua-registry"); + let fetcher = test_registry( + temp.path().to_path_buf(), + Some(file_registry_url(®istry_dir)), + false, + ); + + let registry = fetcher.load_registry().await.unwrap().unwrap(); + let package = registry.package("01mf02/jaq").unwrap(); assert_eq!(package.repo_owner, "01mf02"); assert_eq!(package.repo_name, "jaq"); } #[tokio::test] - async fn test_custom_registry_does_not_fall_back_when_baked_registry_disabled() { + async fn same_source_hash_uses_existing_compiled_cache() { + let temp = tempfile::tempdir().unwrap(); + let registry_dir = temp.path().join("custom-registry"); + std::fs::create_dir(®istry_dir).unwrap(); + let source = "packages:\n - name: example/custom\n url: https://example.com/custom\n"; + std::fs::write(registry_dir.join("registry.yml"), source).unwrap(); + let registry_url = file_registry_url(®istry_dir); + let source_hash = RegistryCache::source_hash(source); + let cache = RegistryCache::new(temp.path()); + let parsed = ParsedRegistry::parse_yaml(source).unwrap(); + cache + .write_compiled(®istry_url, &source_hash, &parsed) + .unwrap(); + + let registry = test_registry(temp.path().to_path_buf(), Some(registry_url), false) + .load_registry() + .await + .unwrap() + .unwrap(); + + assert!(matches!(registry.as_ref(), ActiveRegistry::Compiled(_))); + } + + #[tokio::test] + async fn local_registry_source_bypasses_download_cache() { + let temp = tempfile::tempdir().unwrap(); + let registry_dir = temp.path().join("custom-registry"); + std::fs::create_dir(®istry_dir).unwrap(); + let registry_path = registry_dir.join("registry.yaml"); + std::fs::write( + ®istry_path, + "packages:\n - name: example/first\n url: https://example.com/first\n", + ) + .unwrap(); + + let fetcher = test_registry( + temp.path().join("cache"), + Some(format!("file://{}", registry_dir.display())), + false, + ); + let first = fetcher + .registry_source(fetcher.registry_url.as_deref().unwrap()) + .await + .unwrap(); + + std::fs::write( + registry_path, + "packages:\n - name: example/second\n url: https://example.com/second\n", + ) + .unwrap(); + let second = fetcher + .registry_source(fetcher.registry_url.as_deref().unwrap()) + .await + .unwrap(); + + assert!(first.contains("example/first")); + assert!(second.contains("example/second")); + } + + #[tokio::test] + async fn direct_file_registry_source_is_allowed() { let temp = tempfile::tempdir().unwrap(); - std::fs::create_dir(temp.path().join(".git")).unwrap(); + let registry_path = temp.path().join("registry.yaml"); + std::fs::write( + ®istry_path, + "packages:\n - name: example/direct\n url: https://example.com/direct\n", + ) + .unwrap(); + + let fetcher = test_registry( + temp.path().join("cache"), + Some(file_registry_url(®istry_path)), + false, + ); + let source = fetcher + .registry_source(fetcher.registry_url.as_deref().unwrap()) + .await + .unwrap(); - let err = test_fetcher(temp.path().to_path_buf(), false) - .fetch_package("01mf02/jaq") + assert!(source.contains("example/direct")); + } + + #[tokio::test] + async fn prefer_offline_missing_source_has_clear_error() { + let temp = tempfile::tempdir().unwrap(); + let mut fetcher = test_registry( + temp.path().to_path_buf(), + Some("https://example.com/aqua-registry".to_string()), + false, + ); + fetcher.prefer_offline = true; + + let err = fetcher + .registry_source(fetcher.registry_url.as_deref().unwrap()) .await .unwrap_err(); - assert!(matches!(err, AquaRegistryError::RegistryNotAvailable(_))); + assert!(err.to_string().contains("prefer-offline mode is enabled")); + } + + fn test_registry( + cache_dir: PathBuf, + registry_url: Option, + use_baked_registry: bool, + ) -> AquaRegistry { + AquaRegistry::new( + cache_dir, + registry_url, + use_baked_registry, + false, + DEFAULT_AQUA_REGISTRY_CACHE_TTL, + ) + } + + fn file_registry_url(path: &Path) -> String { + format!("file://{}", path.display()) } } diff --git a/src/config/settings.rs b/src/config/settings.rs index f12353317a..5bb9d7deca 100644 --- a/src/config/settings.rs +++ b/src/config/settings.rs @@ -677,6 +677,16 @@ impl Settings { duration::parse_duration(&self.env_cache_ttl).unwrap() } + pub fn aqua_registry_cache_ttl(&self) -> Duration { + self.aqua + .registry_cache_ttl + .as_deref() + .map(duration::parse_duration) + .transpose() + .unwrap() + .unwrap_or(crate::aqua::aqua_registry_wrapper::DEFAULT_AQUA_REGISTRY_CACHE_TTL) + } + pub fn task_timeout_duration(&self) -> Option { self.task .timeout