diff --git a/.agents/scripts/supervisor-helper.sh b/.agents/scripts/supervisor-helper.sh index 31691073a..fea401675 100755 --- a/.agents/scripts/supervisor-helper.sh +++ b/.agents/scripts/supervisor-helper.sh @@ -4766,6 +4766,19 @@ extract_log_metadata() { fi echo "task_obsolete=$task_obsolete" + # GitHub auth failure detection (t198): workers that complete their work but + # can't push/create a PR because gh auth is expired. This is the #1 cause of + # clean_exit_no_signal (79% of cases in production logs). Without this check, + # the supervisor retries them 3x at Opus cost, each failing identically. + # Search the FULL log (not just tail) because auth failures appear in tool + # output mid-log, not at the end. This is safe because gh auth patterns are + # specific enough to avoid false positives from documentation content. + local gh_auth_failure="false" + if grep -qiE 'gh auth (status|login).*fail|authentication token.*expired|not logged in|try authenticating|gh: To use .* in a non-interactive context|could not authenticate|failed to authenticate' "$log_file" 2>/dev/null; then + gh_auth_failure="true" + fi + echo "gh_auth_failure=$gh_auth_failure" + # Task tool parallelism tracking (t217): detect whether the worker used the # Task tool (mcp_task) to spawn sub-agents for parallel work. This is a # heuristic quality signal — workers that parallelise independent subtasks @@ -5139,6 +5152,20 @@ evaluate_worker() { fi fi + # GitHub auth failure detection (t198): workers that complete their work but + # can't push/create a PR because gh auth is expired or invalid. This is the + # #1 cause of clean_exit_no_signal (79% of cases). Retrying is pointless — + # the auth issue persists across retries. Block immediately so the human can + # fix credentials, then reset the task. + if [[ "$meta_exit_code" == "0" && "$meta_signal" == "none" ]]; then + local meta_gh_auth_failure + meta_gh_auth_failure=$(_meta_get "gh_auth_failure" "false") + if [[ "$meta_gh_auth_failure" == "true" ]]; then + echo "blocked:gh_auth_expired" + return 0 + fi + fi + # Clean exit with no completion signal and no PR (checked DB + gh API above) # = likely incomplete. The agent finished cleanly but didn't emit a signal # and no PR was found. Retry (agent may have run out of context or hit a diff --git a/tests/test-dispatch-worktree-evaluate.sh b/tests/test-dispatch-worktree-evaluate.sh index ecc9c337d..8ea4b2906 100755 --- a/tests/test-dispatch-worktree-evaluate.sh +++ b/tests/test-dispatch-worktree-evaluate.sh @@ -834,6 +834,32 @@ else fail "Backend error in retry log should be backend_quota_error" "Got: $eval_result" fi +# ============================================================ +# SECTION: GitHub Auth Failure Detection (t198) +# ============================================================ +section "GitHub Auth Failure Detection (t198)" + +# Integration test: worker completes work but gh auth is expired +sup add integ-t198b --repo "$TEST_REPO" --description "GH auth failure test" --no-issue >/dev/null +sup transition integ-t198b dispatched >/dev/null +sup transition integ-t198b running >/dev/null + +create_log "integ-t198b" 'WORKER_STARTED task_id=integ-t198b pid=12345 timestamp=2026-02-09T03:00:00Z +{"type":"step_start","timestamp":1770606000000,"part":{"type":"step-start"}} +{"type":"text","timestamp":1770606100000,"part":{"type":"text","text":"All implementation complete. Files created and committed."}} +{"type":"tool_use","timestamp":1770606200000,"part":{"type":"tool","tool":"bash","state":{"status":"completed","input":{"command":"gh auth status"},"output":"You are not logged in to any GitHub hosts. Run gh auth login to authenticate.","metadata":{"exit":1}}}} +{"type":"text","timestamp":1770606300000,"part":{"type":"text","text":"GitHub authentication has expired. Cannot create PR."}} +{"type":"step_finish","timestamp":1770606400000,"part":{"type":"step-finish","reason":"stop"}} +EXIT:0' >/dev/null + +sup transition integ-t198b evaluating >/dev/null +eval_result=$(sup evaluate integ-t198b --no-ai 2>&1 | grep "^Verdict:" || echo "") +if echo "$eval_result" | grep -q "blocked.*gh_auth_expired"; then + pass "Worker + gh auth failure -> blocked:gh_auth_expired (t198)" +else + fail "Worker with gh auth failure should be blocked:gh_auth_expired" "Got: $eval_result" +fi + # ============================================================ # SECTION 10: Concurrent Worktrees (parallel tasks) # ============================================================ diff --git a/tests/test-supervisor-state-machine.sh b/tests/test-supervisor-state-machine.sh index b694b67c4..8bc88aa32 100644 --- a/tests/test-supervisor-state-machine.sh +++ b/tests/test-supervisor-state-machine.sh @@ -1099,6 +1099,37 @@ else fail "Normal incomplete exit should still be clean_exit_no_signal" "Got: $eval_result" fi +# Test: GitHub auth failure -> blocked:gh_auth_expired (t198) +# Workers that complete work but can't push due to expired gh auth should be +# blocked immediately, not retried. This is the #1 cause of wasted retries. +create_eval_task "eval-t198e" 'WORKER_STARTED task_id=eval-t198e pid=12345 timestamp=2026-02-09T03:00:00Z +{"type":"step_start","timestamp":1770606000000,"part":{"type":"step-start"}} +{"type":"text","timestamp":1770606100000,"part":{"type":"text","text":"Implementation complete. All files created and committed."}} +{"type":"tool_use","timestamp":1770606200000,"part":{"type":"tool","tool":"bash","state":{"status":"completed","input":{"command":"gh auth status"},"output":"gh auth status\nYou are not logged in to any GitHub hosts. Run gh auth login to authenticate.","metadata":{"exit":1}}}} +{"type":"text","timestamp":1770606300000,"part":{"type":"text","text":"GitHub authentication token has expired. Cannot push or create PR. The implementation is complete locally but needs gh auth login to proceed."}} +{"type":"step_finish","timestamp":1770606400000,"part":{"type":"step-finish","reason":"stop"}} +EXIT:0' +eval_result=$(sup evaluate eval-t198e --no-ai 2>&1 | grep "^Verdict:" || echo "") +if echo "$eval_result" | grep -q "blocked.*gh_auth_expired"; then + pass "Exit 0 + gh auth failure -> blocked:gh_auth_expired (not retried)" +else + fail "Exit 0 + gh auth failure should be blocked:gh_auth_expired" "Got: $eval_result" +fi + +# Test: gh auth failure with different wording (try authenticating) +create_eval_task "eval-t198f" 'WORKER_STARTED task_id=eval-t198f pid=12345 timestamp=2026-02-09T03:00:00Z +{"type":"step_start","timestamp":1770606000000,"part":{"type":"step-start"}} +{"type":"tool_use","timestamp":1770606100000,"part":{"type":"tool","tool":"bash","state":{"status":"completed","input":{"command":"git push"},"output":"remote: Permission denied. try authenticating with gh auth login","metadata":{"exit":1}}}} +{"type":"text","timestamp":1770606200000,"part":{"type":"text","text":"Push failed. Need to authenticate with GitHub."}} +{"type":"step_finish","timestamp":1770606300000,"part":{"type":"step-finish","reason":"stop"}} +EXIT:0' +eval_result=$(sup evaluate eval-t198f --no-ai 2>&1 | grep "^Verdict:" || echo "") +if echo "$eval_result" | grep -q "blocked.*gh_auth_expired"; then + pass "Exit 0 + 'try authenticating' -> blocked:gh_auth_expired" +else + fail "Exit 0 + 'try authenticating' should be blocked:gh_auth_expired" "Got: $eval_result" +fi + # ============================================================ # SECTION 7: Worktree Path Integrity # ============================================================