Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 43 additions & 5 deletions .agents/scripts/supervisor-helper.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4419,6 +4419,14 @@ extract_log_metadata() {
echo "log_bytes=$(wc -c < "$log_file" | tr -d ' ')"
echo "log_lines=$(wc -l < "$log_file" | tr -d ' ')"

# Content lines: exclude REPROMPT METADATA header (t198). Retry logs include
# an 8-line metadata block that inflates log_lines, causing the backend error
# threshold (< 10 lines) to miss short error-only logs. content_lines counts
# only the actual worker output.
local content_lines
content_lines=$(grep -cv '^=== \(REPROMPT METADATA\|END REPROMPT METADATA\)\|^task_id=\|^timestamp=\|^retry=\|^work_dir=\|^previous_error=\|^fresh_worktree=' "$log_file" 2>/dev/null || echo 0)
echo "content_lines=$content_lines"

# Worker startup sentinel (t183)
if grep -q 'WORKER_STARTED' "$log_file" 2>/dev/null; then
echo "worker_started=true"
Expand Down Expand Up @@ -4459,6 +4467,18 @@ extract_log_metadata() {
fi
echo "pr_url=${final_pr_url}"

# Task obsolete detection (t198): workers that determine a task is already
# done or obsolete exit cleanly with no signal and no PR. Without this,
# the supervisor retries them as clean_exit_no_signal, wasting retries.
# Only check the final text entry (authoritative, same as PR URL extraction).
local task_obsolete="false"
if [[ -n "$last_text_line" ]]; then
if echo "$last_text_line" | grep -qiE 'already done|already complete[d]?|task.*(obsolete|no longer needed)|no (changes|PR) needed|nothing to (change|fix|do)|no work (needed|required|to do)'; then
task_obsolete="true"
fi
fi
echo "task_obsolete=$task_obsolete"

# Exit code
local exit_line
exit_line=$(grep '^EXIT:' "$log_file" 2>/dev/null | tail -1 || true)
Expand Down Expand Up @@ -4775,18 +4795,36 @@ evaluate_worker() {

# Backend infrastructure error with EXIT:0 (t095-diag-1): CLI wrappers like
# OpenCode exit 0 even when the backend rejects the request (quota exceeded,
# backend down). A short log (< 10 lines) with backend errors means the
# worker never started - this is NOT content discussion, it's a real failure.
# backend down). A short log with backend errors means the worker never
# started - this is NOT content discussion, it's a real failure.
# Must be checked BEFORE clean_exit_no_signal to avoid wasting retries.
# (t198): Use content_lines instead of log_lines to exclude REPROMPT METADATA
# headers that inflate the line count in retry logs (8-line header caused
# 12-line logs to miss the < 10 threshold).
if [[ "$meta_exit_code" == "0" && "$meta_signal" == "none" ]]; then
local meta_log_lines
meta_log_lines=$(_meta_get "log_lines" "0")
if [[ "$meta_backend_error_count" -gt 0 && "$meta_log_lines" -lt 10 ]]; then
local meta_content_lines
meta_content_lines=$(_meta_get "content_lines" "0")
if [[ "$meta_backend_error_count" -gt 0 && "$meta_content_lines" -lt 10 ]]; then
echo "retry:backend_quota_error"
return 0
fi
fi

# Task obsolete detection (t198): workers that determine a task is already
# done or obsolete exit cleanly with EXIT:0, no signal, and no PR. Without
# this check, the supervisor retries them as clean_exit_no_signal, wasting
# retry attempts on work that will never produce a PR.
# Uses the final "type":"text" entry (authoritative) to detect explicit
# "already done" / "no changes needed" language from the worker.
if [[ "$meta_exit_code" == "0" && "$meta_signal" == "none" ]]; then
local meta_task_obsolete
meta_task_obsolete=$(_meta_get "task_obsolete" "false")
if [[ "$meta_task_obsolete" == "true" ]]; then
echo "complete:task_obsolete"
return 0
fi
fi

# Clean exit with no completion signal and no PR (checked DB + gh API above)
# = likely incomplete. The agent finished cleanly but didn't emit a signal
# and no PR was found. Retry (agent may have run out of context or hit a
Expand Down
31 changes: 31 additions & 0 deletions tests/test-dispatch-worktree-evaluate.sh
Original file line number Diff line number Diff line change
Expand Up @@ -803,6 +803,37 @@ else
fail "Exit 0 with error strings should NOT be blocked" "Got: $eval_result"
fi

# ============================================================
# SECTION: Backend Error in Retry Logs (t198)
# ============================================================
section "Backend Error in Retry Logs (t198)"

# Integration test: backend error with REPROMPT METADATA header
sup add integ-t198a --repo "$TEST_REPO" --description "Backend error retry test" --no-issue >/dev/null
sup transition integ-t198a dispatched >/dev/null
sup transition integ-t198a running >/dev/null

create_log "integ-t198a" '=== REPROMPT METADATA (t183) ===
task_id=integ-t198a
timestamp=2026-02-09T23:30:27Z
retry=1/3
work_dir=/tmp/test
previous_error=clean_exit_no_signal
fresh_worktree=true
=== END REPROMPT METADATA ===

WORKER_STARTED task_id=integ-t198a retry=1 pid=$$ timestamp=2026-02-09T23:30:27Z
{"type":"error","error":{"message":"Error: All Antigravity endpoints failed"}}
EXIT:0' >/dev/null

sup transition integ-t198a evaluating >/dev/null
eval_result=$(sup evaluate integ-t198a --no-ai 2>&1 | grep "^Verdict:" || echo "")
if echo "$eval_result" | grep -q "retry.*backend_quota_error"; then
pass "Backend error + REPROMPT METADATA -> backend_quota_error (t198)"
else
fail "Backend error in retry log should be backend_quota_error" "Got: $eval_result"
fi

# ============================================================
# SECTION 10: Concurrent Worktrees (parallel tasks)
# ============================================================
Expand Down
67 changes: 67 additions & 0 deletions tests/test-supervisor-state-machine.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1032,6 +1032,73 @@ else
fi
fi

# ============================================================
# SECTION: Backend Error in Retry Logs (t198 - content_lines fix)
# ============================================================
section "Backend Error in Retry Logs (t198)"

# Test: Backend error with REPROMPT METADATA header should be detected as
# backend_quota_error, not clean_exit_no_signal. The metadata header inflates
# log_lines to 12, but content_lines (excluding metadata) is only 4.
create_eval_task "eval-t198a" '=== REPROMPT METADATA (t183) ===
task_id=eval-t198a
timestamp=2026-02-09T23:30:27Z
retry=3/5
work_dir=/tmp/test
previous_error=clean_exit_no_signal
fresh_worktree=true
=== END REPROMPT METADATA ===

WORKER_STARTED task_id=eval-t198a retry=3 pid=$$ timestamp=2026-02-09T23:30:27Z
{"type":"error","timestamp":1770679838445,"error":{"name":"UnknownError","data":{"message":"Error: All Antigravity endpoints failed"}}}
EXIT:0'
eval_result=$(sup evaluate eval-t198a --no-ai 2>&1 | grep "^Verdict:" || echo "")
if echo "$eval_result" | grep -q "retry.*backend_quota_error"; then
pass "Backend error + REPROMPT METADATA -> retry:backend_quota_error (t198 fix)"
else
fail "Backend error in retry log should be backend_quota_error, not clean_exit_no_signal" "Got: $eval_result"
fi

# Test: Task obsolete detection — worker says "already done" in final text
create_eval_task "eval-t198b" 'WORKER_STARTED task_id=eval-t198b pid=12345 timestamp=2026-02-09T03:00:00Z
{"type":"step_start","timestamp":1770606000000,"part":{"type":"step-start"}}
{"type":"tool_use","timestamp":1770606100000,"part":{"type":"tool_use","name":"bash","input":"git status"}}
{"type":"text","timestamp":1770606693412,"part":{"type":"text","text":"**TASK ALREADY DONE — exiting cleanly.** Both files are already valid JSON with no corruption. No PR needed — there are no changes to make."}}
{"type":"step_finish","timestamp":1770606693614,"part":{"type":"step-finish","reason":"stop"}}
EXIT:0'
eval_result=$(sup evaluate eval-t198b --no-ai 2>&1 | grep "^Verdict:" || echo "")
if echo "$eval_result" | grep -q "complete.*task_obsolete"; then
pass "Worker says 'already done' -> complete:task_obsolete (t198 fix)"
else
fail "Worker saying 'already done' should be complete:task_obsolete" "Got: $eval_result"
fi

# Test: Task obsolete with "no changes needed" phrasing
create_eval_task "eval-t198c" 'WORKER_STARTED task_id=eval-t198c pid=12345 timestamp=2026-02-09T03:00:00Z
{"type":"step_start","timestamp":1770606000000,"part":{"type":"step-start"}}
{"type":"text","timestamp":1770606693412,"part":{"type":"text","text":"Task t135.5 is already done. The investigation confirms no changes needed. All specified artifacts are untracked and both directories are gitignored. No PR needed."}}
{"type":"step_finish","timestamp":1770606693614,"part":{"type":"step-finish","reason":"stop"}}
EXIT:0'
eval_result=$(sup evaluate eval-t198c --no-ai 2>&1 | grep "^Verdict:" || echo "")
if echo "$eval_result" | grep -q "complete.*task_obsolete"; then
pass "Worker says 'no changes needed' -> complete:task_obsolete (t198 fix)"
else
fail "Worker saying 'no changes needed' should be complete:task_obsolete" "Got: $eval_result"
fi

# Test: Normal clean_exit_no_signal still works (worker didn't say task is done)
create_eval_task "eval-t198d" 'WORKER_STARTED task_id=eval-t198d pid=12345 timestamp=2026-02-09T03:00:00Z
{"type":"step_start","timestamp":1770606000000,"part":{"type":"step-start"}}
{"type":"text","timestamp":1770606693412,"part":{"type":"text","text":"I started working on the task but ran out of context. The implementation is partially complete."}}
{"type":"step_finish","timestamp":1770606693614,"part":{"type":"step-finish","reason":"stop"}}
EXIT:0'
eval_result=$(sup evaluate eval-t198d --no-ai 2>&1 | grep "^Verdict:" || echo "")
if echo "$eval_result" | grep -q "retry.*clean_exit_no_signal"; then
pass "Normal incomplete exit -> retry:clean_exit_no_signal (unchanged)"
else
fail "Normal incomplete exit should still be clean_exit_no_signal" "Got: $eval_result"
fi

# ============================================================
# SECTION 7: Worktree Path Integrity
# ============================================================
Expand Down