diff --git a/.agents/scripts/supervisor-helper.sh b/.agents/scripts/supervisor-helper.sh index 3b1a705e4..69c631c7b 100755 --- a/.agents/scripts/supervisor-helper.sh +++ b/.agents/scripts/supervisor-helper.sh @@ -245,6 +245,7 @@ readonly VALID_STATES="queued dispatched running evaluating retrying complete pr # shellcheck disable=SC2034 # Used by supervisor/state.sh readonly -a VALID_TRANSITIONS=( "queued:dispatched" + "queued:blocked" "queued:cancelled" "dispatched:running" "dispatched:failed" diff --git a/.agents/scripts/supervisor/dispatch.sh b/.agents/scripts/supervisor/dispatch.sh index df035d21b..67855eb3b 100755 --- a/.agents/scripts/supervisor/dispatch.sh +++ b/.agents/scripts/supervisor/dispatch.sh @@ -1528,14 +1528,16 @@ check_dispatch_dedup_guard() { local cooldown_secs="${SUPERVISOR_FAILURE_COOLDOWN_SECS:-600}" # 10 minutes default local max_consecutive="${SUPERVISOR_MAX_CONSECUTIVE_FAILURES:-2}" - # Rule 2: Block after max_consecutive identical failures + # Rule 2: Cancel after max_consecutive identical failures + # Note: queued->blocked is not a valid transition; use cancelled instead. + # The task can be manually re-queued after investigation. if [[ "$consecutive_count" -ge "$max_consecutive" ]]; then local block_reason="Dispatch dedup guard: $consecutive_count consecutive identical failures (error: ${last_error:-unknown}) — manual intervention required (t1206)" - log_warn " $task_id: BLOCKED by dedup guard — $consecutive_count consecutive identical failures with error '${last_error:-unknown}'" - cmd_transition "$task_id" "blocked" --error "$block_reason" 2>/dev/null || true + log_warn " $task_id: CANCELLED by dedup guard — $consecutive_count consecutive identical failures with error '${last_error:-unknown}'" + cmd_transition "$task_id" "cancelled" --error "$block_reason" 2>/dev/null || true update_todo_on_blocked "$task_id" "$block_reason" 2>/dev/null || true - send_task_notification "$task_id" "blocked" "$block_reason" 2>/dev/null || true - store_failure_pattern "$task_id" "blocked" "$block_reason" "dispatch-dedup-guard" 2>/dev/null || true + send_task_notification "$task_id" "cancelled" "$block_reason" 2>/dev/null || true + store_failure_pattern "$task_id" "cancelled" "$block_reason" "dispatch-dedup-guard" 2>/dev/null || true return 1 fi diff --git a/.agents/scripts/supervisor/todo-sync.sh b/.agents/scripts/supervisor/todo-sync.sh index 37dccfb56..f2ede6642 100755 --- a/.agents/scripts/supervisor/todo-sync.sh +++ b/.agents/scripts/supervisor/todo-sync.sh @@ -1693,7 +1693,7 @@ cmd_reconcile_db_todo() { else log_info "Phase 7b: Transitioning $tid from $tstatus to complete (TODO.md shows [x])" cmd_transition "$tid" "complete" \ - --reason "Reconciled: TODO.md marked [x] but DB was $tstatus (t1001)" \ + --error "Reconciled: TODO.md marked [x] but DB was $tstatus (t1001)" \ 2>>"${SUPERVISOR_LOG:-/dev/null}" || { log_warn "Phase 7b: Failed to transition $tid to complete" continue @@ -1874,7 +1874,7 @@ cmd_reconcile_queue_dispatchability() { else log_info "Phase 0.6: $tid queued in DB but [x] in TODO.md — transitioning to complete" cmd_transition "$tid" "complete" \ - --reason "Reconciled: TODO.md marked [x] but DB was queued (t1180)" \ + --error "Reconciled: TODO.md marked [x] but DB was queued (t1180)" \ 2>>"${SUPERVISOR_LOG:-/dev/null}" || { log_warn "Phase 0.6: Failed to transition $tid to complete" continue @@ -1891,7 +1891,7 @@ cmd_reconcile_queue_dispatchability() { else log_info "Phase 0.6: $tid queued in DB but [-] in TODO.md — cancelling" cmd_transition "$tid" "cancelled" \ - --reason "Reconciled: TODO.md marked [-] but DB was queued (t1180)" \ + --error "Reconciled: TODO.md marked [-] but DB was queued (t1180)" \ 2>>"${SUPERVISOR_LOG:-/dev/null}" || { log_warn "Phase 0.6: Failed to cancel $tid" continue @@ -1950,7 +1950,7 @@ cmd_reconcile_queue_dispatchability() { else log_warn "Phase 0.6: $tid queued in DB but not dispatchable in TODO.md — cancelling phantom entry" cmd_transition "$tid" "cancelled" \ - --reason "Reconciled: queued in DB but TODO.md has no #auto-dispatch tag or Dispatch Queue entry (t1180)" \ + --error "Reconciled: queued in DB but TODO.md has no #auto-dispatch tag or Dispatch Queue entry (t1180)" \ 2>>"${SUPERVISOR_LOG:-/dev/null}" || { log_warn "Phase 0.6: Failed to cancel phantom $tid" continue