diff --git a/.agents/scripts/supervisor/ai-lifecycle.sh b/.agents/scripts/supervisor/ai-lifecycle.sh index 9805dedc6..7a6cdc27f 100644 --- a/.agents/scripts/supervisor/ai-lifecycle.sh +++ b/.agents/scripts/supervisor/ai-lifecycle.sh @@ -466,16 +466,15 @@ execute_lifecycle_action() { return 1 fi - # Use GitHub API to update the branch (no local git needed) - if gh api "repos/${pr_repo_slug}/pulls/${pr_number}/update-branch" \ - -X PUT -f expected_head_sha="" 2>>"$SUPERVISOR_LOG"; then + # Use gh CLI to update the branch (handles sha automatically) + if gh pr update-branch "$pr_number" --repo "$pr_repo_slug" 2>>"$SUPERVISOR_LOG"; then log_success "ai-lifecycle: branch updated for $task_id — CI will re-run" update_task_status_tag "$task_id" "ci-running" "$repo_path" # Reset rebase counter since we used a different strategy db "$SUPERVISOR_DB" "UPDATE tasks SET rebase_attempts = 0 WHERE id = '$escaped_id';" 2>/dev/null || true return 0 else - log_warn "ai-lifecycle: GitHub API update-branch failed for $task_id — will try rebase next cycle" + log_warn "ai-lifecycle: update-branch failed for $task_id — will try rebase next cycle" update_task_status_tag "$task_id" "behind-main" "$repo_path" return 1 fi @@ -912,6 +911,10 @@ process_ai_lifecycle() { local merged_parents="" local repos_with_changes="" + local total_eligible=0 + total_eligible=$(printf '%s\n' "$eligible_tasks" | grep -c '.' || echo "0") + log_info "ai-lifecycle: $total_eligible eligible tasks" + while IFS='|' read -r tid tstatus tpr trepo; do [[ -z "$tid" ]] && continue @@ -944,6 +947,7 @@ process_ai_lifecycle() { # Check if a merge happened local new_status new_status=$(db "$SUPERVISOR_DB" "SELECT status FROM tasks WHERE id = '$(sql_escape "$tid")';" 2>/dev/null || echo "") + log_info "ai-lifecycle: $tid → $new_status" case "$new_status" in merged | deploying | deployed) merged_count=$((merged_count + 1)) @@ -956,6 +960,8 @@ process_ai_lifecycle() { fi ;; esac + else + log_warn "ai-lifecycle: $tid failed (process_task_lifecycle returned non-zero)" fi # Track repos that had status tag changes diff --git a/.agents/scripts/supervisor/pulse.sh b/.agents/scripts/supervisor/pulse.sh index 70330256d..3c824764d 100755 --- a/.agents/scripts/supervisor/pulse.sh +++ b/.agents/scripts/supervisor/pulse.sh @@ -2641,16 +2641,21 @@ RULES: fi # Phase 4b2: Stale pr_review recovery (t1208) - # Tasks in 'pr_review' are processed by Phase 3 (process_post_pr_lifecycle) each - # pulse. However, if cmd_pr_lifecycle fails repeatedly or the PR is in an - # unexpected state, the task can get stuck in pr_review indefinitely. - # After SUPERVISOR_PR_REVIEW_STALE_SECONDS (default 3600 = 1h), force a - # re-attempt via cmd_pr_lifecycle. If that also fails, log a warning so the - # operator can investigate — do NOT auto-fail pr_review tasks since the PR - # may be legitimately waiting for CI or human review. - local pr_review_stale_seconds="${SUPERVISOR_PR_REVIEW_STALE_SECONDS:-3600}" - local stale_pr_review - stale_pr_review=$(db -separator '|' "$SUPERVISOR_DB" " + # When AI lifecycle is active, Phase 3 handles all pr_review tasks via + # process_ai_lifecycle — skip legacy cmd_pr_lifecycle to avoid clobbering + # AI decisions (e.g., marking tasks as "Merge failed" when the AI already + # decided to escalate or wait). + if [[ "${SUPERVISOR_AI_LIFECYCLE:-true}" != "true" ]]; then + # Tasks in 'pr_review' are processed by Phase 3 (process_post_pr_lifecycle) each + # pulse. However, if cmd_pr_lifecycle fails repeatedly or the PR is in an + # unexpected state, the task can get stuck in pr_review indefinitely. + # After SUPERVISOR_PR_REVIEW_STALE_SECONDS (default 3600 = 1h), force a + # re-attempt via cmd_pr_lifecycle. If that also fails, log a warning so the + # operator can investigate — do NOT auto-fail pr_review tasks since the PR + # may be legitimately waiting for CI or human review. + local pr_review_stale_seconds="${SUPERVISOR_PR_REVIEW_STALE_SECONDS:-3600}" + local stale_pr_review + stale_pr_review=$(db -separator '|' "$SUPERVISOR_DB" " SELECT id, pr_url, updated_at FROM tasks WHERE status = 'pr_review' @@ -2658,28 +2663,29 @@ RULES: ORDER BY updated_at ASC; " 2>/dev/null || echo "") - if [[ -n "$stale_pr_review" ]]; then - local pr_review_recovered=0 - while IFS='|' read -r spr_id spr_pr_url spr_updated; do - [[ -n "$spr_id" ]] || continue - log_warn " Stale pr_review: $spr_id (last updated: ${spr_updated:-unknown}, >${pr_review_stale_seconds}s) — re-attempting lifecycle (t1208)" - if cmd_pr_lifecycle "$spr_id" 2>>"$SUPERVISOR_LOG"; then - local spr_new_status - spr_new_status=$(db "$SUPERVISOR_DB" "SELECT status FROM tasks WHERE id = '$(sql_escape "$spr_id")';" 2>/dev/null || echo "") - if [[ "$spr_new_status" != "pr_review" ]]; then - log_info " Phase 4b2: $spr_id advanced from pr_review → $spr_new_status" - pr_review_recovered=$((pr_review_recovered + 1)) + if [[ -n "$stale_pr_review" ]]; then + local pr_review_recovered=0 + while IFS='|' read -r spr_id spr_pr_url spr_updated; do + [[ -n "$spr_id" ]] || continue + log_warn " Stale pr_review: $spr_id (last updated: ${spr_updated:-unknown}, >${pr_review_stale_seconds}s) — re-attempting lifecycle (t1208)" + if cmd_pr_lifecycle "$spr_id" 2>>"$SUPERVISOR_LOG"; then + local spr_new_status + spr_new_status=$(db "$SUPERVISOR_DB" "SELECT status FROM tasks WHERE id = '$(sql_escape "$spr_id")';" 2>/dev/null || echo "") + if [[ "$spr_new_status" != "pr_review" ]]; then + log_info " Phase 4b2: $spr_id advanced from pr_review → $spr_new_status" + pr_review_recovered=$((pr_review_recovered + 1)) + else + log_warn " Phase 4b2: $spr_id still in pr_review after lifecycle attempt — may need manual review (PR: ${spr_pr_url:-none})" + fi else - log_warn " Phase 4b2: $spr_id still in pr_review after lifecycle attempt — may need manual review (PR: ${spr_pr_url:-none})" + log_warn " Phase 4b2: cmd_pr_lifecycle failed for stale $spr_id — will retry next pulse (PR: ${spr_pr_url:-none})" fi - else - log_warn " Phase 4b2: cmd_pr_lifecycle failed for stale $spr_id — will retry next pulse (PR: ${spr_pr_url:-none})" + done <<<"$stale_pr_review" + if [[ "$pr_review_recovered" -gt 0 ]]; then + log_info " Phase 4b2: $pr_review_recovered stale pr_review task(s) advanced" fi - done <<<"$stale_pr_review" - if [[ "$pr_review_recovered" -gt 0 ]]; then - log_info " Phase 4b2: $pr_review_recovered stale pr_review task(s) advanced" fi - fi + fi # End of Phase 4b2 legacy guard (SUPERVISOR_AI_LIFECYCLE != true) # Phase 4c: Cancel stale diagnostic subtasks whose parent is already resolved # Diagnostic tasks (diagnostic_of != NULL) become stale when the parent task