diff --git a/.agents/scripts/supervisor/ai-lifecycle.sh b/.agents/scripts/supervisor/ai-lifecycle.sh index 9805dedc6a..dfa5e949a8 100644 --- a/.agents/scripts/supervisor/ai-lifecycle.sh +++ b/.agents/scripts/supervisor/ai-lifecycle.sh @@ -912,6 +912,10 @@ process_ai_lifecycle() { local merged_parents="" local repos_with_changes="" + local total_eligible=0 + total_eligible=$(printf '%s\n' "$eligible_tasks" | grep -c '.' || echo "0") + log_info "ai-lifecycle: $total_eligible eligible tasks" + while IFS='|' read -r tid tstatus tpr trepo; do [[ -z "$tid" ]] && continue @@ -944,6 +948,7 @@ process_ai_lifecycle() { # Check if a merge happened local new_status new_status=$(db "$SUPERVISOR_DB" "SELECT status FROM tasks WHERE id = '$(sql_escape "$tid")';" 2>/dev/null || echo "") + log_info "ai-lifecycle: $tid → $new_status" case "$new_status" in merged | deploying | deployed) merged_count=$((merged_count + 1)) @@ -956,6 +961,8 @@ process_ai_lifecycle() { fi ;; esac + else + log_warn "ai-lifecycle: $tid failed (process_task_lifecycle returned non-zero)" fi # Track repos that had status tag changes diff --git a/.agents/scripts/supervisor/pulse.sh b/.agents/scripts/supervisor/pulse.sh index 70330256d3..3c824764d6 100755 --- a/.agents/scripts/supervisor/pulse.sh +++ b/.agents/scripts/supervisor/pulse.sh @@ -2641,16 +2641,21 @@ RULES: fi # Phase 4b2: Stale pr_review recovery (t1208) - # Tasks in 'pr_review' are processed by Phase 3 (process_post_pr_lifecycle) each - # pulse. However, if cmd_pr_lifecycle fails repeatedly or the PR is in an - # unexpected state, the task can get stuck in pr_review indefinitely. - # After SUPERVISOR_PR_REVIEW_STALE_SECONDS (default 3600 = 1h), force a - # re-attempt via cmd_pr_lifecycle. If that also fails, log a warning so the - # operator can investigate — do NOT auto-fail pr_review tasks since the PR - # may be legitimately waiting for CI or human review. - local pr_review_stale_seconds="${SUPERVISOR_PR_REVIEW_STALE_SECONDS:-3600}" - local stale_pr_review - stale_pr_review=$(db -separator '|' "$SUPERVISOR_DB" " + # When AI lifecycle is active, Phase 3 handles all pr_review tasks via + # process_ai_lifecycle — skip legacy cmd_pr_lifecycle to avoid clobbering + # AI decisions (e.g., marking tasks as "Merge failed" when the AI already + # decided to escalate or wait). + if [[ "${SUPERVISOR_AI_LIFECYCLE:-true}" != "true" ]]; then + # Tasks in 'pr_review' are processed by Phase 3 (process_post_pr_lifecycle) each + # pulse. However, if cmd_pr_lifecycle fails repeatedly or the PR is in an + # unexpected state, the task can get stuck in pr_review indefinitely. + # After SUPERVISOR_PR_REVIEW_STALE_SECONDS (default 3600 = 1h), force a + # re-attempt via cmd_pr_lifecycle. If that also fails, log a warning so the + # operator can investigate — do NOT auto-fail pr_review tasks since the PR + # may be legitimately waiting for CI or human review. + local pr_review_stale_seconds="${SUPERVISOR_PR_REVIEW_STALE_SECONDS:-3600}" + local stale_pr_review + stale_pr_review=$(db -separator '|' "$SUPERVISOR_DB" " SELECT id, pr_url, updated_at FROM tasks WHERE status = 'pr_review' @@ -2658,28 +2663,29 @@ RULES: ORDER BY updated_at ASC; " 2>/dev/null || echo "") - if [[ -n "$stale_pr_review" ]]; then - local pr_review_recovered=0 - while IFS='|' read -r spr_id spr_pr_url spr_updated; do - [[ -n "$spr_id" ]] || continue - log_warn " Stale pr_review: $spr_id (last updated: ${spr_updated:-unknown}, >${pr_review_stale_seconds}s) — re-attempting lifecycle (t1208)" - if cmd_pr_lifecycle "$spr_id" 2>>"$SUPERVISOR_LOG"; then - local spr_new_status - spr_new_status=$(db "$SUPERVISOR_DB" "SELECT status FROM tasks WHERE id = '$(sql_escape "$spr_id")';" 2>/dev/null || echo "") - if [[ "$spr_new_status" != "pr_review" ]]; then - log_info " Phase 4b2: $spr_id advanced from pr_review → $spr_new_status" - pr_review_recovered=$((pr_review_recovered + 1)) + if [[ -n "$stale_pr_review" ]]; then + local pr_review_recovered=0 + while IFS='|' read -r spr_id spr_pr_url spr_updated; do + [[ -n "$spr_id" ]] || continue + log_warn " Stale pr_review: $spr_id (last updated: ${spr_updated:-unknown}, >${pr_review_stale_seconds}s) — re-attempting lifecycle (t1208)" + if cmd_pr_lifecycle "$spr_id" 2>>"$SUPERVISOR_LOG"; then + local spr_new_status + spr_new_status=$(db "$SUPERVISOR_DB" "SELECT status FROM tasks WHERE id = '$(sql_escape "$spr_id")';" 2>/dev/null || echo "") + if [[ "$spr_new_status" != "pr_review" ]]; then + log_info " Phase 4b2: $spr_id advanced from pr_review → $spr_new_status" + pr_review_recovered=$((pr_review_recovered + 1)) + else + log_warn " Phase 4b2: $spr_id still in pr_review after lifecycle attempt — may need manual review (PR: ${spr_pr_url:-none})" + fi else - log_warn " Phase 4b2: $spr_id still in pr_review after lifecycle attempt — may need manual review (PR: ${spr_pr_url:-none})" + log_warn " Phase 4b2: cmd_pr_lifecycle failed for stale $spr_id — will retry next pulse (PR: ${spr_pr_url:-none})" fi - else - log_warn " Phase 4b2: cmd_pr_lifecycle failed for stale $spr_id — will retry next pulse (PR: ${spr_pr_url:-none})" + done <<<"$stale_pr_review" + if [[ "$pr_review_recovered" -gt 0 ]]; then + log_info " Phase 4b2: $pr_review_recovered stale pr_review task(s) advanced" fi - done <<<"$stale_pr_review" - if [[ "$pr_review_recovered" -gt 0 ]]; then - log_info " Phase 4b2: $pr_review_recovered stale pr_review task(s) advanced" fi - fi + fi # End of Phase 4b2 legacy guard (SUPERVISOR_AI_LIFECYCLE != true) # Phase 4c: Cancel stale diagnostic subtasks whose parent is already resolved # Diagnostic tasks (diagnostic_of != NULL) become stale when the parent task