Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 10 additions & 4 deletions .agents/scripts/supervisor/ai-lifecycle.sh
Original file line number Diff line number Diff line change
Expand Up @@ -466,16 +466,15 @@ execute_lifecycle_action() {
return 1
fi

# Use GitHub API to update the branch (no local git needed)
if gh api "repos/${pr_repo_slug}/pulls/${pr_number}/update-branch" \
-X PUT -f expected_head_sha="" 2>>"$SUPERVISOR_LOG"; then
# Use gh CLI to update the branch (handles sha automatically)
if gh pr update-branch "$pr_number" --repo "$pr_repo_slug" 2>>"$SUPERVISOR_LOG"; then
log_success "ai-lifecycle: branch updated for $task_id — CI will re-run"
update_task_status_tag "$task_id" "ci-running" "$repo_path"
# Reset rebase counter since we used a different strategy
db "$SUPERVISOR_DB" "UPDATE tasks SET rebase_attempts = 0 WHERE id = '$escaped_id';" 2>/dev/null || true
return 0
else
log_warn "ai-lifecycle: GitHub API update-branch failed for $task_id — will try rebase next cycle"
log_warn "ai-lifecycle: update-branch failed for $task_id — will try rebase next cycle"
update_task_status_tag "$task_id" "behind-main" "$repo_path"
return 1
fi
Expand Down Expand Up @@ -912,6 +911,10 @@ process_ai_lifecycle() {
local merged_parents=""
local repos_with_changes=""

local total_eligible=0
total_eligible=$(printf '%s\n' "$eligible_tasks" | grep -c '.' || echo "0")

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

This line-counting logic is buggy when $eligible_tasks is empty. The combination of grep -c's output on no match and the || echo "0" guard results in $total_eligible being assigned a multi-line value (0\n0), which will cause problems in logs or arithmetic operations.

A more robust way to count lines in a variable is to use a here-string with grep and guard with || true, which correctly handles the empty case by returning 0.

Suggested change
total_eligible=$(printf '%s\n' "$eligible_tasks" | grep -c '.' || echo "0")
total_eligible=$(grep -c . <<< "$eligible_tasks" || true)
References
  1. The style guide (line 13) requires using || true to guard commands that may fail under set -e, such as grep. The current implementation uses || echo "0", which causes incorrect behavior when grep finds no matches. (link)
  2. In shell scripts with 'set -e' enabled, use '|| true' to prevent the script from exiting when a command like 'grep' fails (e.g., no matches found). This ensures script continuity without suppressing error output.

log_info "ai-lifecycle: $total_eligible eligible tasks"

while IFS='|' read -r tid tstatus tpr trepo; do
[[ -z "$tid" ]] && continue

Expand Down Expand Up @@ -944,6 +947,7 @@ process_ai_lifecycle() {
# Check if a merge happened
local new_status
new_status=$(db "$SUPERVISOR_DB" "SELECT status FROM tasks WHERE id = '$(sql_escape "$tid")';" 2>/dev/null || echo "")
log_info "ai-lifecycle: $tid → $new_status"
case "$new_status" in
merged | deploying | deployed)
merged_count=$((merged_count + 1))
Expand All @@ -956,6 +960,8 @@ process_ai_lifecycle() {
fi
;;
esac
else
log_warn "ai-lifecycle: $tid failed (process_task_lifecycle returned non-zero)"
fi

# Track repos that had status tag changes
Expand Down
62 changes: 34 additions & 28 deletions .agents/scripts/supervisor/pulse.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2641,45 +2641,51 @@ RULES:
fi

# Phase 4b2: Stale pr_review recovery (t1208)
# Tasks in 'pr_review' are processed by Phase 3 (process_post_pr_lifecycle) each
# pulse. However, if cmd_pr_lifecycle fails repeatedly or the PR is in an
# unexpected state, the task can get stuck in pr_review indefinitely.
# After SUPERVISOR_PR_REVIEW_STALE_SECONDS (default 3600 = 1h), force a
# re-attempt via cmd_pr_lifecycle. If that also fails, log a warning so the
# operator can investigate — do NOT auto-fail pr_review tasks since the PR
# may be legitimately waiting for CI or human review.
local pr_review_stale_seconds="${SUPERVISOR_PR_REVIEW_STALE_SECONDS:-3600}"
local stale_pr_review
stale_pr_review=$(db -separator '|' "$SUPERVISOR_DB" "
# When AI lifecycle is active, Phase 3 handles all pr_review tasks via
# process_ai_lifecycle — skip legacy cmd_pr_lifecycle to avoid clobbering
# AI decisions (e.g., marking tasks as "Merge failed" when the AI already
# decided to escalate or wait).
if [[ "${SUPERVISOR_AI_LIFECYCLE:-true}" != "true" ]]; then
# Tasks in 'pr_review' are processed by Phase 3 (process_post_pr_lifecycle) each
# pulse. However, if cmd_pr_lifecycle fails repeatedly or the PR is in an
# unexpected state, the task can get stuck in pr_review indefinitely.
# After SUPERVISOR_PR_REVIEW_STALE_SECONDS (default 3600 = 1h), force a
# re-attempt via cmd_pr_lifecycle. If that also fails, log a warning so the
# operator can investigate — do NOT auto-fail pr_review tasks since the PR
# may be legitimately waiting for CI or human review.
local pr_review_stale_seconds="${SUPERVISOR_PR_REVIEW_STALE_SECONDS:-3600}"
local stale_pr_review
stale_pr_review=$(db -separator '|' "$SUPERVISOR_DB" "
SELECT id, pr_url, updated_at
FROM tasks
WHERE status = 'pr_review'
AND updated_at < strftime('%Y-%m-%dT%H:%M:%SZ', 'now', '-${pr_review_stale_seconds} seconds')
ORDER BY updated_at ASC;
" 2>/dev/null || echo "")

if [[ -n "$stale_pr_review" ]]; then
local pr_review_recovered=0
while IFS='|' read -r spr_id spr_pr_url spr_updated; do
[[ -n "$spr_id" ]] || continue
log_warn " Stale pr_review: $spr_id (last updated: ${spr_updated:-unknown}, >${pr_review_stale_seconds}s) — re-attempting lifecycle (t1208)"
if cmd_pr_lifecycle "$spr_id" 2>>"$SUPERVISOR_LOG"; then
local spr_new_status
spr_new_status=$(db "$SUPERVISOR_DB" "SELECT status FROM tasks WHERE id = '$(sql_escape "$spr_id")';" 2>/dev/null || echo "")
if [[ "$spr_new_status" != "pr_review" ]]; then
log_info " Phase 4b2: $spr_id advanced from pr_review → $spr_new_status"
pr_review_recovered=$((pr_review_recovered + 1))
if [[ -n "$stale_pr_review" ]]; then
local pr_review_recovered=0
while IFS='|' read -r spr_id spr_pr_url spr_updated; do
[[ -n "$spr_id" ]] || continue
log_warn " Stale pr_review: $spr_id (last updated: ${spr_updated:-unknown}, >${pr_review_stale_seconds}s) — re-attempting lifecycle (t1208)"
if cmd_pr_lifecycle "$spr_id" 2>>"$SUPERVISOR_LOG"; then
local spr_new_status
spr_new_status=$(db "$SUPERVISOR_DB" "SELECT status FROM tasks WHERE id = '$(sql_escape "$spr_id")';" 2>/dev/null || echo "")
if [[ "$spr_new_status" != "pr_review" ]]; then
log_info " Phase 4b2: $spr_id advanced from pr_review → $spr_new_status"
pr_review_recovered=$((pr_review_recovered + 1))
else
log_warn " Phase 4b2: $spr_id still in pr_review after lifecycle attempt — may need manual review (PR: ${spr_pr_url:-none})"
fi
else
log_warn " Phase 4b2: $spr_id still in pr_review after lifecycle attempt — may need manual review (PR: ${spr_pr_url:-none})"
log_warn " Phase 4b2: cmd_pr_lifecycle failed for stale $spr_id — will retry next pulse (PR: ${spr_pr_url:-none})"
fi
else
log_warn " Phase 4b2: cmd_pr_lifecycle failed for stale $spr_id — will retry next pulse (PR: ${spr_pr_url:-none})"
done <<<"$stale_pr_review"
if [[ "$pr_review_recovered" -gt 0 ]]; then
log_info " Phase 4b2: $pr_review_recovered stale pr_review task(s) advanced"
fi
done <<<"$stale_pr_review"
if [[ "$pr_review_recovered" -gt 0 ]]; then
log_info " Phase 4b2: $pr_review_recovered stale pr_review task(s) advanced"
fi
fi
fi # End of Phase 4b2 legacy guard (SUPERVISOR_AI_LIFECYCLE != true)

# Phase 4c: Cancel stale diagnostic subtasks whose parent is already resolved
# Diagnostic tasks (diagnostic_of != NULL) become stale when the parent task
Expand Down
Loading