Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 34 additions & 28 deletions .agents/scripts/supervisor/pulse.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2641,45 +2641,51 @@ RULES:
fi

# Phase 4b2: Stale pr_review recovery (t1208)
# Tasks in 'pr_review' are processed by Phase 3 (process_post_pr_lifecycle) each
# pulse. However, if cmd_pr_lifecycle fails repeatedly or the PR is in an
# unexpected state, the task can get stuck in pr_review indefinitely.
# After SUPERVISOR_PR_REVIEW_STALE_SECONDS (default 3600 = 1h), force a
# re-attempt via cmd_pr_lifecycle. If that also fails, log a warning so the
# operator can investigate — do NOT auto-fail pr_review tasks since the PR
# may be legitimately waiting for CI or human review.
local pr_review_stale_seconds="${SUPERVISOR_PR_REVIEW_STALE_SECONDS:-3600}"
local stale_pr_review
stale_pr_review=$(db -separator '|' "$SUPERVISOR_DB" "
# When AI lifecycle is active, Phase 3 handles all pr_review tasks via
# process_ai_lifecycle — skip legacy cmd_pr_lifecycle to avoid clobbering
# AI decisions (e.g., marking tasks as "Merge failed" when the AI already
# decided to escalate or wait).
if [[ "${SUPERVISOR_AI_LIFECYCLE:-true}" != "true" ]]; then
# Tasks in 'pr_review' are processed by Phase 3 (process_post_pr_lifecycle) each
# pulse. However, if cmd_pr_lifecycle fails repeatedly or the PR is in an
# unexpected state, the task can get stuck in pr_review indefinitely.
# After SUPERVISOR_PR_REVIEW_STALE_SECONDS (default 3600 = 1h), force a
# re-attempt via cmd_pr_lifecycle. If that also fails, log a warning so the
# operator can investigate — do NOT auto-fail pr_review tasks since the PR
# may be legitimately waiting for CI or human review.
local pr_review_stale_seconds="${SUPERVISOR_PR_REVIEW_STALE_SECONDS:-3600}"
local stale_pr_review
stale_pr_review=$(db -separator '|' "$SUPERVISOR_DB" "
SELECT id, pr_url, updated_at
FROM tasks
WHERE status = 'pr_review'
AND updated_at < strftime('%Y-%m-%dT%H:%M:%SZ', 'now', '-${pr_review_stale_seconds} seconds')
ORDER BY updated_at ASC;
" 2>/dev/null || echo "")

if [[ -n "$stale_pr_review" ]]; then
local pr_review_recovered=0
while IFS='|' read -r spr_id spr_pr_url spr_updated; do
[[ -n "$spr_id" ]] || continue
log_warn " Stale pr_review: $spr_id (last updated: ${spr_updated:-unknown}, >${pr_review_stale_seconds}s) — re-attempting lifecycle (t1208)"
if cmd_pr_lifecycle "$spr_id" 2>>"$SUPERVISOR_LOG"; then
local spr_new_status
spr_new_status=$(db "$SUPERVISOR_DB" "SELECT status FROM tasks WHERE id = '$(sql_escape "$spr_id")';" 2>/dev/null || echo "")
if [[ "$spr_new_status" != "pr_review" ]]; then
log_info " Phase 4b2: $spr_id advanced from pr_review → $spr_new_status"
pr_review_recovered=$((pr_review_recovered + 1))
if [[ -n "$stale_pr_review" ]]; then
local pr_review_recovered=0
while IFS='|' read -r spr_id spr_pr_url spr_updated; do
[[ -n "$spr_id" ]] || continue
log_warn " Stale pr_review: $spr_id (last updated: ${spr_updated:-unknown}, >${pr_review_stale_seconds}s) — re-attempting lifecycle (t1208)"
if cmd_pr_lifecycle "$spr_id" 2>>"$SUPERVISOR_LOG"; then
local spr_new_status
spr_new_status=$(db "$SUPERVISOR_DB" "SELECT status FROM tasks WHERE id = '$(sql_escape "$spr_id")';" 2>/dev/null || echo "")

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Avoid blanket suppression of errors with 2>/dev/null. If the database query fails, the error should be visible for debugging purposes.

Suggested change
spr_new_status=$(db "$SUPERVISOR_DB" "SELECT status FROM tasks WHERE id = '$(sql_escape "$spr_id")';" 2>/dev/null || echo "")
spr_new_status=$(db "$SUPERVISOR_DB" "SELECT status FROM tasks WHERE id = '$(sql_escape "$spr_id")';" || echo "")
References
  1. Avoid using '2>/dev/null' for blanket suppression of command errors in shell scripts to ensure that authentication, syntax, or system issues remain visible for debugging.

if [[ "$spr_new_status" != "pr_review" ]]; then
log_info " Phase 4b2: $spr_id advanced from pr_review → $spr_new_status"
pr_review_recovered=$((pr_review_recovered + 1))
else
log_warn " Phase 4b2: $spr_id still in pr_review after lifecycle attempt — may need manual review (PR: ${spr_pr_url:-none})"
fi
else
log_warn " Phase 4b2: $spr_id still in pr_review after lifecycle attempt — may need manual review (PR: ${spr_pr_url:-none})"
log_warn " Phase 4b2: cmd_pr_lifecycle failed for stale $spr_id — will retry next pulse (PR: ${spr_pr_url:-none})"
fi
else
log_warn " Phase 4b2: cmd_pr_lifecycle failed for stale $spr_id — will retry next pulse (PR: ${spr_pr_url:-none})"
done <<<"$stale_pr_review"
if [[ "$pr_review_recovered" -gt 0 ]]; then
log_info " Phase 4b2: $pr_review_recovered stale pr_review task(s) advanced"
fi
done <<<"$stale_pr_review"
if [[ "$pr_review_recovered" -gt 0 ]]; then
log_info " Phase 4b2: $pr_review_recovered stale pr_review task(s) advanced"
fi
fi
fi # End of Phase 4b2 legacy guard (SUPERVISOR_AI_LIFECYCLE != true)

# Phase 4c: Cancel stale diagnostic subtasks whose parent is already resolved
# Diagnostic tasks (diagnostic_of != NULL) become stale when the parent task
Expand Down
Loading