From 320467932152d38eb0026ca813d87bfbef13269d Mon Sep 17 00:00:00 2001 From: jdx <216188+jdx@users.noreply.github.com> Date: Sat, 31 Jan 2026 13:39:09 -0600 Subject: [PATCH 1/4] feat(task): improve source freshness checking with edge case handling - Add epoch timestamp detection: files with mtime == UNIX_EPOCH (e.g., from tarball extraction) are treated as stale - Include file size in metadata hash to detect changes when mtimes are unreliable - Add warning when sources are defined but no matching files found - Add warning when task completes but expected outputs don't exist - Add optional content hashing (blake3) via task.source_freshness_hash_contents - Add optional equal mtime comparison via task.source_freshness_equal_mtime_is_fresh Co-Authored-By: Claude Opus 4.5 --- e2e/tasks/test_task_source_freshness | 105 +++++++++++++++++++++++ schema/mise.json | 10 +++ settings.toml | 12 +++ src/task/task_executor.rs | 2 +- src/task/task_source_checker.rs | 122 +++++++++++++++++++++++---- 5 files changed, 232 insertions(+), 19 deletions(-) create mode 100644 e2e/tasks/test_task_source_freshness diff --git a/e2e/tasks/test_task_source_freshness b/e2e/tasks/test_task_source_freshness new file mode 100644 index 0000000000..084d6fa2c1 --- /dev/null +++ b/e2e/tasks/test_task_source_freshness @@ -0,0 +1,105 @@ +#!/usr/bin/env bash + +# Test warning for missing outputs after task execution +cat <mise.toml +[tasks.nooutput] +run = 'echo ran but no output created' +sources = ['input.txt'] +outputs = ['missing_output.txt'] +EOF + +echo "source" >input.txt +# Should warn about missing output +assert_contains "mise run nooutput 2>&1" "did not generate expected output" + +# Test size-based detection (file size changes should trigger rebuild) +cat <mise.toml +[tasks.sizetest] +run = 'echo rebuilt' +sources = ['sized.txt'] +outputs = ['sizeout.txt'] +EOF + +echo "small" >sized.txt +echo "out" >sizeout.txt +sleep 0.1 +touch sizeout.txt + +# First run should skip (output is newer) +assert_empty "mise run -q sizetest" + +# Change file content/size - this changes the metadata hash +echo "much larger content now" >sized.txt +# Copy mtime from output to source (so mtime comparison alone would say fresh) +touch -r sizeout.txt sized.txt +# Should rebuild because size changed in the metadata hash +assert "mise run -q sizetest" "rebuilt" + +# Test that normal source/output freshness still works +cat <mise.toml +[tasks.fresh] +run = 'echo built' +sources = ['src.txt'] +outputs = ['out.txt'] +EOF + +echo "source" >src.txt +sleep 0.1 +echo "output" >out.txt + +# Output is newer, should skip +assert_empty "mise run -q fresh" + +# Touch source to make it newer +sleep 0.1 +touch src.txt + +# Source is newer, should rebuild +assert "mise run -q fresh" "built" + +# Test rename detection (path is part of the hash) +cat <mise.toml +[tasks.rename] +run = 'echo rebuilt' +sources = ['*.src'] +outputs = ['rename.out'] +EOF + +echo "content" >original.src +echo "out" >rename.out +sleep 0.1 +touch rename.out + +# First run should skip (output is newer) +assert_empty "mise run -q rename" + +# Rename the source file (same content, same size, but different path) +mv original.src renamed.src +# Copy mtime from output so only the path change triggers rebuild +touch -r rename.out renamed.src +# Should rebuild because path changed in the metadata hash +assert "mise run -q rename" "rebuilt" + +# Test glob output patterns - should not warn when glob matches files +cat <mise.toml +[tasks.globout] +run = 'touch output1.gen output2.gen' +sources = ['input.txt'] +outputs = ['*.gen'] +EOF + +echo "source" >input.txt +# Should run and NOT warn (glob matches created files) +assert_not_contains "mise run globout 2>&1" "did not generate expected output" + +# Test glob output patterns - should warn when glob matches no files +cat <mise.toml +[tasks.noglobout] +run = 'echo no files created' +sources = ['input.txt'] +outputs = ['*.nomatch'] +EOF + +echo "source" >input.txt +# Should warn about missing glob output +assert_contains "mise run noglobout 2>&1" "did not generate expected output" diff --git a/schema/mise.json b/schema/mise.json index 3a41766548..64d676bcfa 100644 --- a/schema/mise.json +++ b/schema/mise.json @@ -1424,6 +1424,16 @@ "default": true, "description": "Whether to respect .gitignore files when discovering monorepo subdirectories.", "type": "boolean" + }, + "source_freshness_equal_mtime_is_fresh": { + "default": false, + "description": "When source mtime equals output mtime, consider sources fresh (use <=). Default false uses strict < comparison.", + "type": "boolean" + }, + "source_freshness_hash_contents": { + "default": false, + "description": "Use content hashing (blake3) instead of metadata for source freshness. More accurate but slower.", + "type": "boolean" } } } diff --git a/settings.toml b/settings.toml index d6264d1e73..ede2e918d4 100644 --- a/settings.toml +++ b/settings.toml @@ -1871,3 +1871,15 @@ when discovering tasks in a monorepo. """ env = "MISE_TASK_MONOREPO_RESPECT_GITIGNORE" type = "Bool" + +[task.source_freshness_equal_mtime_is_fresh] +default = false +description = "When source mtime equals output mtime, consider sources fresh (use <=). Default false uses strict < comparison." +env = "MISE_TASK_SOURCE_FRESHNESS_EQUAL_MTIME_IS_FRESH" +type = "Bool" + +[task.source_freshness_hash_contents] +default = false +description = "Use content hashing (blake3) instead of metadata for source freshness. More accurate but slower." +env = "MISE_TASK_SOURCE_FRESHNESS_HASH_CONTENTS" +type = "Bool" diff --git a/src/task/task_executor.rs b/src/task/task_executor.rs index 9a6c8ab8eb..b08e24711d 100644 --- a/src/task/task_executor.rs +++ b/src/task/task_executor.rs @@ -265,7 +265,7 @@ impl TaskExecutor { ); } - save_checksum(task)?; + save_checksum(task, config).await?; Ok(()) } diff --git a/src/task/task_source_checker.rs b/src/task/task_source_checker.rs index c4f2d41617..ded8403c44 100644 --- a/src/task/task_source_checker.rs +++ b/src/task/task_source_checker.rs @@ -1,4 +1,4 @@ -use crate::config::Config; +use crate::config::{Config, Settings}; use crate::dirs; use crate::file::{self, display_path}; use crate::hash; @@ -11,7 +11,7 @@ use std::fs; use std::hash::{DefaultHasher, Hash, Hasher}; use std::path::{Path, PathBuf}; use std::sync::Arc; -use std::time::SystemTime; +use std::time::{SystemTime, UNIX_EPOCH}; /// Check if a path is a glob pattern pub fn is_glob_pattern(path: &str) -> bool { @@ -102,6 +102,10 @@ pub async fn sources_are_fresh(task: &Task, config: &Arc) -> Result Result { @@ -109,25 +113,64 @@ pub async fn sources_are_fresh(task: &Task, config: &Arc) -> Result Ok(sources < outputs), + (Some(sources), Some(outputs)) => { + if equal_mtime_is_fresh { + Ok(sources <= outputs) + } else { + Ok(sources < outputs) + } + } _ => Ok(false), } }; @@ -138,7 +181,7 @@ pub async fn sources_are_fresh(task: &Task, config: &Arc) -> Result Result<()> { +pub async fn save_checksum(task: &Task, config: &Arc) -> Result<()> { if task.sources.is_empty() { return Ok(()); } @@ -147,22 +190,53 @@ pub fn save_checksum(task: &Task) -> Result<()> { debug!("touching auto output file: {p}"); file::touch_file(&PathBuf::from(&p))?; } + } else { + // Check if explicitly defined outputs were generated + // Use task_cwd to respect the task's dir setting, matching sources_are_fresh behavior + let root = task_cwd(task, config).await?; + for output in task.outputs.paths(task) { + let output_exists = if is_glob_pattern(&output) { + // For glob patterns, check if any files match + let pattern = root.join(&output); + glob(pattern.to_str().unwrap_or_default()) + .map(|paths| paths.flatten().next().is_some()) + .unwrap_or(false) + } else { + // For regular paths, check if file exists + let path = Path::new(&output); + let full_path = if path.is_relative() { + root.join(path) + } else { + path.to_path_buf() + }; + full_path.exists() + }; + if !output_exists { + warn!( + "task {} did not generate expected output: {}", + task.name, output + ); + } + } } Ok(()) } /// Get the path to store source hashes for a task -fn sources_hash_path(task: &Task) -> PathBuf { +fn sources_hash_path(task: &Task, content_hash: bool) -> PathBuf { let mut hasher = DefaultHasher::new(); task.hash(&mut hasher); task.config_source.hash(&mut hasher); let hash = format!("{:x}", hasher.finish()); - dirs::STATE.join("task-sources").join(&hash) + let suffix = if content_hash { "-content" } else { "" }; + dirs::STATE + .join("task-sources") + .join(format!("{hash}{suffix}")) } /// Get the existing source hash for a task, if it exists -fn source_metadata_existing_hash(task: &Task) -> Option { - let path = sources_hash_path(task); +fn source_existing_hash(task: &Task, content_hash: bool) -> Option { + let path = sources_hash_path(task, content_hash); if path.exists() { Some(file::read_to_string(&path).unwrap_or_default()) } else { @@ -207,9 +281,21 @@ fn get_file_metadatas( } /// Convert file metadata to a hash string for comparison +/// Includes path and file size to detect changes even when mtimes are unreliable fn file_metadatas_to_hash(metadatas: &[(PathBuf, fs::Metadata)]) -> String { - let paths: Vec<_> = metadatas.iter().map(|(p, _)| p).collect(); - hash::hash_to_str(&paths) + let path_and_sizes: Vec<_> = metadatas.iter().map(|(p, m)| (p, m.len())).collect(); + hash::hash_to_str(&path_and_sizes) +} + +/// Convert file contents to a hash string for comparison using blake3 +/// More accurate than metadata hashing but slower since it reads all file contents +fn file_contents_to_hash(metadatas: &[(PathBuf, fs::Metadata)]) -> Result { + let mut content_hashes: Vec<(&PathBuf, String)> = Vec::new(); + for (path, _) in metadatas { + let file_hash = hash::file_hash_blake3(path, None)?; + content_hashes.push((path, file_hash)); + } + Ok(hash::hash_to_str(&content_hashes)) } /// Get the last modified time from file metadata From 30a72298f081f8856469308552ce702127c4f0ad Mon Sep 17 00:00:00 2001 From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com> Date: Sat, 31 Jan 2026 20:43:39 +0000 Subject: [PATCH 2/4] [autofix.ci] apply automated fixes --- src/task/task_source_checker.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/task/task_source_checker.rs b/src/task/task_source_checker.rs index ded8403c44..354717d8b7 100644 --- a/src/task/task_source_checker.rs +++ b/src/task/task_source_checker.rs @@ -126,15 +126,14 @@ pub async fn sources_are_fresh(task: &Task, config: &Arc) -> Result Date: Sat, 31 Jan 2026 20:47:28 +0000 Subject: [PATCH 3/4] [autofix.ci] apply automated fixes (attempt 2/3) --- src/task/task_source_checker.rs | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/task/task_source_checker.rs b/src/task/task_source_checker.rs index 354717d8b7..e6cd104106 100644 --- a/src/task/task_source_checker.rs +++ b/src/task/task_source_checker.rs @@ -127,13 +127,14 @@ pub async fn sources_are_fresh(task: &Task, config: &Arc) -> Result Date: Sat, 31 Jan 2026 15:00:37 -0600 Subject: [PATCH 4/4] fix: use portable mtime approach in source freshness tests The `touch -r` approach with equal mtimes didn't properly isolate the hash-based detection since equal mtimes also trigger rebuilds. Changed to `sleep + touch output` to ensure output is newer than source, so only the hash check (size/path change) triggers rebuild. Co-Authored-By: Claude Opus 4.5 --- e2e/tasks/test_task_source_freshness | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/e2e/tasks/test_task_source_freshness b/e2e/tasks/test_task_source_freshness index 084d6fa2c1..41c19a77c4 100644 --- a/e2e/tasks/test_task_source_freshness +++ b/e2e/tasks/test_task_source_freshness @@ -30,8 +30,9 @@ assert_empty "mise run -q sizetest" # Change file content/size - this changes the metadata hash echo "much larger content now" >sized.txt -# Copy mtime from output to source (so mtime comparison alone would say fresh) -touch -r sizeout.txt sized.txt +# Make output NEWER than source (so mtime comparison alone would say fresh) +sleep 0.1 +touch sizeout.txt # Should rebuild because size changed in the metadata hash assert "mise run -q sizetest" "rebuilt" @@ -75,8 +76,9 @@ assert_empty "mise run -q rename" # Rename the source file (same content, same size, but different path) mv original.src renamed.src -# Copy mtime from output so only the path change triggers rebuild -touch -r rename.out renamed.src +# Make output NEWER than source so only the path change triggers rebuild +sleep 0.1 +touch rename.out # Should rebuild because path changed in the metadata hash assert "mise run -q rename" "rebuilt"