diff --git a/e2e/cli/test_prepare b/e2e/cli/test_prepare index 1fead15608..b3e551e430 100644 --- a/e2e/cli/test_prepare +++ b/e2e/cli/test_prepare @@ -242,7 +242,7 @@ assert_contains "mise prepare --list" "composer" assert_contains "mise prepare --list" "composer.lock" assert_contains "mise prepare --dry-run" "composer" -# Test freshness detection with output directories +# Test freshness detection with content hashing rm -f composer.lock composer.json cat >mise.toml <<'EOF' @@ -256,28 +256,25 @@ cat >requirements.txt <<'EOF' requests==2.31.0 EOF -# Test 1: No output directory exists → should be stale with reason -rm -rf .venv +# Test 1: No output directory exists → should be stale (outputs missing) +rm -rf .venv .mise assert_contains "mise prepare --dry-run" "custom_venv" -assert_contains "mise prepare --dry-run" "does not exist" +assert_contains "mise prepare --dry-run" "outputs missing" -# Test 2: Output exists and is newer than sources → should be fresh -# Use explicit timestamps: source=2023, output=2024 +# Test 2: Run prepare to establish state, then check freshness mkdir -p .venv -touch -t 202301010000 requirements.txt # Jan 1, 2023 -touch -t 202401010000 .venv/pyvenv.cfg # Jan 1, 2024 (newer) -touch -t 202401010000 .venv # Set directory mtime too (mkdir sets it to NOW) -# In dry-run mode, fresh providers produce no output, so check provider name is absent +assert_contains "mise prepare --only custom_venv" "Prepared: custom_venv" + +# Second run: sources unchanged, should be fresh (content hashes match) assert_not_contains "mise prepare --dry-run" "custom_venv" -# Test 3: Sources updated after output → should be stale with reason -# source=2025 (newer than output=2024) -touch -t 202501010000 requirements.txt # Jan 1, 2025 +# Test 3: Change source content → should be stale +echo "flask==3.0.0" >>requirements.txt assert_contains "mise prepare --dry-run" "custom_venv" -assert_contains "mise prepare --dry-run" "is newer than outputs" +assert_contains "mise prepare --dry-run" "requirements.txt changed" # Clean up -rm -rf .venv requirements.txt +rm -rf .venv requirements.txt .mise rm -f composer.lock composer.json package.json mise.toml schema.graphql # Test --explain flag @@ -301,10 +298,9 @@ assert_contains "cat .explain_out" "Outputs:" assert_contains "cat .explain_out" "Command: echo explain_ran" assert_contains "cat .explain_out" "stale" -# Test --explain with fresh output +# Test --explain with fresh output (run prepare first to establish state) mkdir -p out_dir -touch -t 202301010000 src_input.txt -touch -t 202401010000 out_dir +mise prepare --only explain_test >/dev/null 2>&1 || true mise prepare explain_test --explain >.explain_out 2>&1 assert_contains "cat .explain_out" "Provider: explain_test" assert_contains "cat .explain_out" "fresh" @@ -315,47 +311,31 @@ assert_contains "cat .explain_out" "not found" rm -rf src_input.txt out_dir mise.toml .explain_out -# Test touch_outputs: after a successful run, output mtime should be updated +# Test content-hash freshness: after a successful run, state is saved and provider is fresh cat >mise.toml <<'EOF' -[prepare.touch_test] +[prepare.hash_test] sources = ["src_file.txt"] outputs = ["out_dir"] -run = "echo TOUCH_RAN" +run = "echo HASH_RAN" EOF -touch src_file.txt +echo "original content" >src_file.txt mkdir -p out_dir -# Make source newer than output so provider is stale -touch -t 202501010000 out_dir -touch -t 202601010000 src_file.txt - -# First run: should execute (stale) -assert_contains "mise prepare --only touch_test" "Prepared: touch_test" - -# Second run: outputs should now be fresh (mtime was touched after first run) -assert_contains "mise prepare --only touch_test" "up to date" -# Test touch_outputs = false: output mtime should NOT be updated -cat >mise.toml <<'EOF' -[prepare.notouch_test] -sources = ["src_file.txt"] -outputs = ["out_dir"] -run = "echo NOTOUCH_RAN" -touch_outputs = false -EOF +# First run: should execute (no previous state) +assert_contains "mise prepare --only hash_test" "Prepared: hash_test" -# Make source newer than output so provider is stale -touch -t 202501010000 out_dir -touch -t 202601010000 src_file.txt +# Second run: sources unchanged, should be fresh (hashes saved in state) +assert_contains "mise prepare --only hash_test" "up to date" -# First run: should execute (stale) -assert_contains "mise prepare --only notouch_test" "Prepared: notouch_test" +# Modify source content → should be stale again +echo "modified content" >src_file.txt +assert_contains "mise prepare --only hash_test" "Prepared: hash_test" -# Second run: should still be stale since we didn't touch outputs -# (the "echo" command doesn't modify out_dir, and we disabled mtime update) -assert_contains "mise prepare --only notouch_test" "Prepared: notouch_test" +# Third run after re-prepare: fresh again +assert_contains "mise prepare --only hash_test" "up to date" -rm -rf src_file.txt out_dir +rm -rf src_file.txt out_dir .mise # Test timeout: a command that exceeds its timeout should fail cat >mise.toml <<'EOF' @@ -409,7 +389,7 @@ assert_not_contains "mise prepare --list" "custom_sub" ) # Clean up previous test -rm -rf subproject root_input.txt root_output.txt mise.toml +rm -rf subproject root_input.txt root_output.txt mise.toml .mise # Test that monorepo subdirectory prepare steps run when using //subdir:task from root export MISE_EXPERIMENTAL=1 @@ -454,7 +434,7 @@ rm -f subapp/deps_marker assert_contains "mise run //:check" "SUBAPP_PREPARE_OK" # Clean up -rm -rf subapp mise.toml +rm -rf subapp mise.toml .mise # Test git-submodule provider cat >.gitmodules <<'EOF' @@ -483,4 +463,4 @@ EOF assert_not_contains "mise prepare --list" "git-submodule" # Clean up -rm -f .gitmodules mise.toml +rm -f .gitmodules mise.toml .mise diff --git a/src/prepare/engine.rs b/src/prepare/engine.rs index a6c9a6dae2..bd4c0ac0af 100644 --- a/src/prepare/engine.rs +++ b/src/prepare/engine.rs @@ -1,7 +1,6 @@ use std::collections::{BTreeMap, HashMap, HashSet}; use std::path::{Path, PathBuf}; use std::sync::Arc; -use std::time::SystemTime; use eyre::Result; use filetime::FileTime; @@ -16,7 +15,6 @@ use crate::ui::multi_progress_report::MultiProgressReport; type StepOutput = (PrepareStepResult, Vec); type JobOutput = Result<(String, PrepareStepResult, Vec), (String, eyre::Report)>; -use super::PrepareProvider; use super::prepare_deps::PrepareDeps; use super::providers::{ BunPrepareProvider, BundlerPrepareProvider, ComposerPrepareProvider, CustomPrepareProvider, @@ -24,6 +22,8 @@ use super::providers::{ PnpmPrepareProvider, PoetryPrepareProvider, UvPrepareProvider, YarnPrepareProvider, }; use super::rule::BUILTIN_PROVIDERS; +use super::state::{self, PrepareState}; +use super::{FreshnessResult, PrepareProvider}; /// Options for running prepare steps #[derive(Debug, Default)] @@ -42,49 +42,12 @@ pub struct PrepareOptions { pub auto_only: bool, } -/// Result of a freshness check with human-readable reason -#[derive(Debug, Clone)] -pub enum FreshnessResult { - /// Outputs are up to date - Fresh, - /// No outputs defined — always run - NoOutputs, - /// Output was created this session (e.g., auto-created venv) - SessionStale(String), - /// Some output files/dirs don't exist yet - OutputsMissing(String), - /// Sources are newer than outputs - Stale(String), - /// No sources exist — consider fresh - NoSources, - /// Forced by user request - Forced, -} - -impl FreshnessResult { - pub fn is_fresh(&self) -> bool { - matches!(self, FreshnessResult::Fresh | FreshnessResult::NoSources) - } - - pub fn reason(&self) -> &str { - match self { - FreshnessResult::Fresh => "up to date", - FreshnessResult::NoOutputs => "no outputs defined", - FreshnessResult::SessionStale(r) => r, - FreshnessResult::OutputsMissing(r) => r, - FreshnessResult::Stale(r) => r, - FreshnessResult::NoSources => "no sources", - FreshnessResult::Forced => "forced", - } - } -} - /// Result of a prepare step #[derive(Debug)] pub enum PrepareStepResult { /// Step ran successfully Ran(String), - /// Step would have run (dry-run mode) — (id, reason) + /// Step would have run (dry-run mode), with reason why it's stale WouldRun(String, String), /// Step was skipped because outputs are fresh Fresh(String), @@ -380,15 +343,14 @@ impl PrepareEngine { }; if !freshness.is_fresh() { - let reason = freshness.reason().to_string(); let cmd = provider.prepare_command()?; let outputs = provider.outputs(); let touch = provider.touch_outputs(); let depends = provider.depends(); let timeout = provider.timeout(); + let reason = freshness.reason().to_string(); if opts.dry_run { - // Just record that it would run, let CLI handle output results.push(PrepareStepResult::WouldRun(id, reason)); } else { to_run.push(PrepareJob { @@ -431,6 +393,23 @@ impl PrepareEngine { results.push(step_result); } } + + // Save content hashes for all successfully ran providers + for step in &results { + if let PrepareStepResult::Ran(id) = step + && let Some(provider) = self.providers.iter().find(|p| p.id() == id) + { + let project_root = &provider.base().project_root; + let sources = provider.sources(); + if let Ok(hashes) = state::hash_sources(&sources, project_root) { + let mut st = PrepareState::load(project_root); + st.set_hashes(id, hashes); + if let Err(e) = st.save(project_root) { + warn!("failed to save prepare state: {e}"); + } + } + } + } } Ok(PrepareResult { steps: results }) @@ -673,9 +652,12 @@ impl PrepareEngine { Ok(results) } - /// Check if outputs are newer than sources (stateless mtime comparison) - /// Returns a FreshnessResult with a human-readable reason - fn check_freshness(&self, provider: &dyn PrepareProvider) -> Result { + /// Check if a provider's outputs are fresh relative to its sources. + /// + /// Uses blake3 content hashing with persistent state. On first run (no stored + /// hashes), the provider is always considered stale. Session-based stale + /// tracking (venv auto-creation) is always checked first. + pub fn check_freshness(&self, provider: &dyn PrepareProvider) -> Result { let sources = provider.sources(); let outputs = provider.outputs(); @@ -684,105 +666,59 @@ impl PrepareEngine { } // Check if any output was created this session (before prepare ran) - // This handles the case where venv is auto-created but packages aren't installed yet for output in &outputs { if super::is_output_stale(output) { - return Ok(FreshnessResult::SessionStale(format!( - "{} created this session", - output.display() - ))); + return Ok(FreshnessResult::Stale( + "output created this session".to_string(), + )); } } - // Check for missing outputs + // Check if any output is missing for output in &outputs { if !output.exists() { - return Ok(FreshnessResult::OutputsMissing(format!( - "{} does not exist", - output.display() - ))); + return Ok(FreshnessResult::OutputsMissing); } } - let sources_mtime = Self::last_modified(&sources)?; - let outputs_mtime = Self::last_modified(&outputs)?; - - match (sources_mtime, outputs_mtime) { - (Some(src), Some(out)) if src > out => { - // Find which source is newest to provide a helpful reason - let newest_source = sources - .iter() - .filter(|p| p.exists()) - .filter_map(|p| { - let mtime = if p.is_dir() { - Self::newest_file_in_dir(p, 3) - } else { - p.metadata().ok().and_then(|m| m.modified().ok()) - }; - mtime.map(|m| (p, m)) - }) - .max_by_key(|(_, m)| *m) - .map(|(p, _)| p.display().to_string()) - .unwrap_or_else(|| "sources".to_string()); - Ok(FreshnessResult::Stale(format!( - "{newest_source} is newer than outputs" - ))) - } - (Some(_), Some(_)) => Ok(FreshnessResult::Fresh), - (_, None) => Ok(FreshnessResult::Stale( - "could not determine modification time of outputs".to_string(), - )), - (None, _) => Ok(FreshnessResult::NoSources), + if sources.is_empty() { + return Ok(FreshnessResult::NoSources); } - } - - /// Get the most recent modification time from a list of paths - /// For directories, recursively finds the newest file within (up to 3 levels deep) - fn last_modified(paths: &[PathBuf]) -> Result> { - let mut mtimes: Vec = vec![]; - for path in paths.iter().filter(|p| p.exists()) { - if path.is_dir() { - // For directories, find the newest file within (limited depth for performance) - if let Some(mtime) = Self::newest_file_in_dir(path, 3) { - mtimes.push(mtime); + // Use content-hash comparison via persistent state + let project_root = &provider.base().project_root; + let st = PrepareState::load(project_root); + let provider_id = provider.id(); + + let current_hashes = state::hash_sources(&sources, project_root)?; + + match st.get_hashes(provider_id) { + Some(stored_hashes) => { + // Check for changed files + for (path, hash) in ¤t_hashes { + match stored_hashes.get(path.as_str()) { + Some(stored_hash) if stored_hash == hash => {} + Some(_) => { + return Ok(FreshnessResult::Stale(format!("{path} changed"))); + } + None => { + return Ok(FreshnessResult::Stale(format!("{path} added"))); + } + } } - } else if let Some(mtime) = path.metadata().ok().and_then(|m| m.modified().ok()) { - mtimes.push(mtime); - } - } - - Ok(mtimes.into_iter().max()) - } - - /// Recursively find the newest file modification time in a directory. - /// The directory's own mtime is always included so that touching the directory - /// itself (e.g. via `touch_outputs`) is reflected in freshness checks. - fn newest_file_in_dir(dir: &Path, max_depth: usize) -> Option { - // Always seed with the directory's own mtime so that touching the dir - // (without modifying its contents) is visible to freshness checks. - let mut newest = dir.metadata().ok().and_then(|m| m.modified().ok()); - - if max_depth == 0 { - return newest; - } - - if let Ok(entries) = std::fs::read_dir(dir) { - for entry in entries.flatten() { - let path = entry.path(); - let mtime = if path.is_dir() { - Self::newest_file_in_dir(&path, max_depth - 1) - } else { - path.metadata().ok().and_then(|m| m.modified().ok()) - }; - - if let Some(t) = mtime { - newest = Some(newest.map_or(t, |n| n.max(t))); + // Check for removed files + for path in stored_hashes.keys() { + if !current_hashes.contains_key(path) { + return Ok(FreshnessResult::Stale(format!("{path} removed"))); + } } + Ok(FreshnessResult::Fresh) + } + None => { + // No stored state — first run, consider stale + Ok(FreshnessResult::Stale("no previous state".to_string())) } } - - newest } /// Execute a prepare command (static version for parallel execution) diff --git a/src/prepare/mod.rs b/src/prepare/mod.rs index c141c72b1f..7c4e769853 100644 --- a/src/prepare/mod.rs +++ b/src/prepare/mod.rs @@ -15,6 +15,43 @@ mod engine; pub(crate) mod prepare_deps; pub mod providers; mod rule; +pub mod state; + +/// Result of a freshness check for a prepare provider +#[derive(Debug, Clone)] +pub enum FreshnessResult { + /// Outputs are up to date with sources + Fresh, + /// Provider has no outputs defined, always run to be safe + NoOutputs, + /// One or more output paths don't exist + OutputsMissing, + /// Sources have changed since last successful run + Stale(String), + /// Provider has no sources, consider fresh + NoSources, + /// Force flag was used + Forced, +} + +impl FreshnessResult { + /// Returns true if the provider should be considered fresh (no work needed) + pub fn is_fresh(&self) -> bool { + matches!(self, FreshnessResult::Fresh | FreshnessResult::NoSources) + } + + /// Human-readable reason string for display + pub fn reason(&self) -> &str { + match self { + FreshnessResult::Fresh => "outputs are up to date", + FreshnessResult::NoOutputs => "no outputs defined", + FreshnessResult::OutputsMissing => "outputs missing", + FreshnessResult::Stale(reason) => reason, + FreshnessResult::NoSources => "no sources to check", + FreshnessResult::Forced => "forced", + } + } +} /// A command to execute for preparation #[derive(Debug, Clone)] diff --git a/src/prepare/state.rs b/src/prepare/state.rs new file mode 100644 index 0000000000..442c2a3488 --- /dev/null +++ b/src/prepare/state.rs @@ -0,0 +1,123 @@ +use std::collections::BTreeMap; +use std::path::{Path, PathBuf}; + +use eyre::Result; + +use crate::file; +use crate::hash::file_hash_blake3; + +/// Persistent state for prepare freshness checking. +/// +/// Stores blake3 content hashes of source files keyed by provider ID. +/// Persisted to `.mise/prepare-state.toml`. +#[derive(Debug, Default, serde::Serialize, serde::Deserialize)] +pub struct PrepareState { + /// provider_id → (relative_path → blake3_hex) + #[serde(default)] + pub providers: BTreeMap>, +} + +impl PrepareState { + /// Load state from `.mise/prepare-state.toml`, returning default if not found. + pub fn load(project_root: &Path) -> Self { + let path = state_path(project_root); + if !path.exists() { + return Self::default(); + } + match file::read_to_string(&path) { + Ok(contents) => match toml::from_str(&contents) { + Ok(state) => state, + Err(e) => { + warn!("failed to parse {}: {e}", path.display()); + Self::default() + } + }, + Err(e) => { + warn!("failed to read {}: {e}", path.display()); + Self::default() + } + } + } + + /// Save state to `.mise/prepare-state.toml`. + pub fn save(&self, project_root: &Path) -> Result<()> { + let path = state_path(project_root); + file::create_dir_all(path.parent().unwrap())?; + let contents = toml::to_string_pretty(self)?; + file::write(&path, contents)?; + Ok(()) + } + + /// Get stored hashes for a provider, or None if not previously recorded. + pub fn get_hashes(&self, provider_id: &str) -> Option<&BTreeMap> { + self.providers.get(provider_id) + } + + /// Update stored hashes for a provider. + pub fn set_hashes(&mut self, provider_id: &str, hashes: BTreeMap) { + self.providers.insert(provider_id.to_string(), hashes); + } +} + +/// Compute blake3 hashes for a list of source files. +/// +/// Returns a map of relative_path → blake3_hex. Directories are skipped +/// (only regular files are hashed). Non-existent files are omitted. +pub fn hash_sources(sources: &[PathBuf], project_root: &Path) -> Result> { + let mut hashes = BTreeMap::new(); + + for source in sources { + if !source.exists() { + continue; + } + + if source.is_dir() { + // For directories, hash all files within (up to 3 levels deep) + hash_dir_files(&mut hashes, source, project_root, 3)?; + } else { + let hash = file_hash_blake3(source, None)?; + let rel = source + .strip_prefix(project_root) + .unwrap_or(source) + .to_string_lossy() + .to_string(); + hashes.insert(rel, hash); + } + } + + Ok(hashes) +} + +/// Recursively hash files in a directory up to max_depth levels. +fn hash_dir_files( + hashes: &mut BTreeMap, + dir: &Path, + project_root: &Path, + max_depth: usize, +) -> Result<()> { + if max_depth == 0 { + return Ok(()); + } + if let Ok(entries) = std::fs::read_dir(dir) { + for entry in entries.flatten() { + let path = entry.path(); + if path.is_dir() { + hash_dir_files(hashes, &path, project_root, max_depth - 1)?; + } else { + let hash = file_hash_blake3(&path, None)?; + let rel = path + .strip_prefix(project_root) + .unwrap_or(&path) + .to_string_lossy() + .to_string(); + hashes.insert(rel, hash); + } + } + } + Ok(()) +} + +/// Path to the state file for a given project root. +fn state_path(project_root: &Path) -> PathBuf { + project_root.join(".mise").join("prepare-state.toml") +}