From 5a1ba9e6ce38df1ccb90a6b8371bb1afccd252b6 Mon Sep 17 00:00:00 2001 From: jdx <216188+jdx@users.noreply.github.com> Date: Fri, 26 Sep 2025 10:22:06 +0000 Subject: [PATCH] fix(task): prevent hang when nested tasks fail MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a task failed within a sequence of tasks (especially nested or dependent tasks), mise would hang indefinitely instead of properly exiting with an error. This was caused by the dependency graph never becoming empty when dependent tasks were not executed due to an earlier failure. The fix modifies inject_and_wait() to periodically check for failures and clean up the dependency graph when stopping early, ensuring proper termination. Fixes: https://github.com/jdx/mise/discussions/6391 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- e2e/tasks/test_task_failure_hang | 38 ++++++++++++++++++++++++++++++++ src/cli/run.rs | 38 ++++++++++++++++++++++++++++---- 2 files changed, 72 insertions(+), 4 deletions(-) create mode 100755 e2e/tasks/test_task_failure_hang diff --git a/e2e/tasks/test_task_failure_hang b/e2e/tasks/test_task_failure_hang new file mode 100755 index 0000000000..d3ed15ce0e --- /dev/null +++ b/e2e/tasks/test_task_failure_hang @@ -0,0 +1,38 @@ +#!/usr/bin/env bash +set -euo pipefail +# https://github.com/jdx/mise/discussions/6391 + +cat <mise.toml +[tasks.fails] +run = ''' +sleep 1; +echo "An error occurred!" +exit 1; +''' + +[tasks.deponfails] +depends = ["fails"] +run = 'echo "This will not run because the dependency fails."' + +[tasks.grouped] +run = [ + { task = "deponfails" } +] +EOF + +# Test that task failure with dependencies does not hang +timeout 5s mise run grouped 2>&1 && exit_code=0 || exit_code=$? + +# Check if it was a timeout (exit code 124) +if [ "$exit_code" -eq 124 ]; then + echo "FAIL: Task hung after dependency failure (timeout reached)" + exit 1 +fi + +# The command should fail with exit code 1 +if [ "$exit_code" -ne 1 ]; then + echo "Expected exit code 1, got $exit_code" + exit 1 +fi + +echo "Test passed: task with failing dependency did not hang" diff --git a/src/cli/run.rs b/src/cli/run.rs index b33fa65b8e..cc1530d8f7 100644 --- a/src/cli/run.rs +++ b/src/cli/run.rs @@ -829,7 +829,7 @@ impl Run { let sub_deps = Arc::new(Mutex::new(sub_deps)); // Pump subgraph into scheduler and signal completion via oneshot when done - let (done_tx, done_rx) = oneshot::channel::<()>(); + let (done_tx, mut done_rx) = oneshot::channel::<()>(); let task_env_directives: Vec = task_env.iter().cloned().map(Into::into).collect(); { @@ -887,10 +887,40 @@ impl Run { }); } - // Wait for completion - done_rx.await.map_err(|e| eyre!(e))?; + // Wait for completion with a check for early stopping + loop { + // Check if we should stop early due to failure + if self.is_stopping() && !self.continue_on_error { + trace!("inject_and_wait: stopping early due to failure"); + // Clean up the dependency graph to ensure completion + let mut deps = sub_deps.lock().await; + let tasks_to_remove: Vec = deps.all().cloned().collect(); + for task in tasks_to_remove { + deps.remove(&task); + } + drop(deps); + // Give a short time for the spawned task to finish cleanly + let _ = tokio::time::timeout(Duration::from_millis(100), done_rx).await; + return Err(eyre!("task sequence aborted due to failure")); + } + + // Try to receive the done signal with a short timeout + match tokio::time::timeout(Duration::from_millis(100), &mut done_rx).await { + Ok(Ok(())) => { + trace!("inject_and_wait: received done signal"); + break; + } + Ok(Err(e)) => { + return Err(eyre!(e)); + } + Err(_) => { + // Timeout, check again if we should stop + continue; + } + } + } - // Check if we failed during the execution + // Final check if we failed during the execution if self.is_stopping() && !self.continue_on_error { return Err(eyre!("task sequence aborted due to failure")); }