marcusquinn · marcusquinn · Feb 18, 2026 · Feb 18, 2026 · gemini-code-assist · Feb 18, 2026
diff --git a/.agents/scripts/supervisor/ai-actions.sh b/.agents/scripts/supervisor/ai-actions.sh
@@ -333,10 +333,8 @@ validate_action_fields() {
 			echo "missing required field: task_id"
 			return 0
 		fi
-		if [[ -z "$new_priority" ]]; then
-			echo "missing required field: new_priority"
-			return 0
-		fi
+		# new_priority is no longer strictly required — the executor infers it
+		# from reasoning text if missing (see _exec_adjust_priority)
 		;;
 	close_verified)
 		local issue_number pr_number
@@ -715,9 +713,25 @@ _exec_adjust_priority() {
 
 	local task_id new_priority reasoning
 	task_id=$(printf '%s' "$action" | jq -r '.task_id')
-	new_priority=$(printf '%s' "$action" | jq -r '.new_priority')
+	new_priority=$(printf '%s' "$action" | jq -r '.new_priority // empty')
 	reasoning=$(printf '%s' "$action" | jq -r '.reasoning // "No reasoning provided"')
 
+	# Infer priority from reasoning if the AI omitted the field (common pattern —
+	# the AI has omitted new_priority in 13+ actions across 5+ cycles)
+	if [[ -z "$new_priority" || "$new_priority" == "null" ]]; then
+		if printf '%s' "$reasoning" | grep -qi 'critical\|urgent\|blocker\|blocking'; then
+			new_priority="critical"
+		elif printf '%s' "$reasoning" | grep -qi 'high\|important\|prioriti'; then
+			new_priority="high"
+		elif printf '%s' "$reasoning" | grep -qi 'low\|minor\|defer'; then
+			new_priority="low"
+		else
+			# Default to high — the AI is recommending a change, usually an escalation
+			new_priority="high"
+		fi
+		log_warn "AI Actions: adjust_priority inferred new_priority='$new_priority' from reasoning (field was missing)"
+	fi
+
 	# Find the task's GitHub issue number
 	local issue_number=""
 	if declare -f find_task_issue_number &>/dev/null; then

diff --git a/.agents/scripts/supervisor/ai-reason.sh b/.agents/scripts/supervisor/ai-reason.sh
@@ -282,10 +282,22 @@ ${user_prompt}"
 
 	if [[ -z "$action_plan" || "$action_plan" == "null" ]]; then
 		log_warn "AI Reasoning: no parseable action plan in response"
+		# Debug diagnostics for intermittent parse failures
+		local response_len json_block_count first_bytes last_bytes
+		response_len=$(printf '%s' "$ai_result" | wc -c | tr -d ' ')
+		json_block_count=$(printf '%s' "$ai_result" | grep -c '^```json' || echo 0)
+		first_bytes=$(printf '%s' "$ai_result" | head -c 100 | tr '\n' ' ')
+		last_bytes=$(printf '%s' "$ai_result" | tail -c 100 | tr '\n' ' ')
 		{
 			echo "## Parsing Result"
 			echo ""
 			echo "Status: FAILED - no parseable JSON action plan"
+			echo ""
+			echo "### Debug Diagnostics"
+			echo "- Response length: $response_len bytes"
+			echo "- \`\`\`json blocks found: $json_block_count"
+			echo "- First 100 bytes: \`$first_bytes\`"
+			echo "- Last 100 bytes: \`$last_bytes\`"
 		} >>"$reason_log"
 		echo '{"error":"no_action_plan","actions":[]}'
 		_release_ai_lock
@@ -517,6 +529,30 @@ extract_action_plan() {
 		fi
 	fi
 
+	# Try 5: Write response to temp file and parse from file
+	# This handles edge cases where the shell variable may have lost data
+	# (e.g., null bytes, very long lines, or subshell truncation)
+	local tmpfile
+	tmpfile=$(mktemp "${TMPDIR:-/tmp}/ai-parse-XXXXXX")
+	printf '%s' "$response" >"$tmpfile"
+
+	# Try file-based extraction of last ```json block
+	json_block=$(awk '
+		/^```json/ { capture=1; block=""; next }
+		/^```$/ && capture { capture=0; last_block=block; next }
+		capture { block = block (block ? "\n" : "") $0 }
+		END { if (capture && block) print block; else if (last_block) print last_block }
+	' "$tmpfile")
+	rm -f "$tmpfile"
+
+	if [[ -n "$json_block" ]]; then
+		parsed=$(printf '%s' "$json_block" | jq '.' 2>/dev/null)
+		if [[ $? -eq 0 && -n "$parsed" ]]; then
+			printf '%s' "$parsed"
+			return 0
+		fi
+	fi
-	local tmpfile
-	tmpfile=$(mktemp "${TMPDIR:-/tmp}/ai-parse-XXXXXX")
-	printf '%s' "$response" >"$tmpfile"
-
-	# Try file-based extraction of last ```json block
-	json_block=$(awk '
-		/^```json/ { capture=1; block=""; next }
-		/^```$/ && capture { capture=0; last_block=block; next }
-		capture { block = block (block ? "\n" : "") $0 }
-		END { if (capture && block) print block; else if (last_block) print last_block }
-	' "$tmpfile")
-	rm -f "$tmpfile"
-
-	if [[ -n "$json_block" ]]; then
-		parsed=$(printf '%s' "$json_block" | jq '.' 2>/dev/null)
-		if [[ $? -eq 0 && -n "$parsed" ]]; then
-			printf '%s' "$parsed"
-			return 0
-		fi
-	fi
+if parsed=$( (
+    local tmpfile
+    tmpfile=$(mktemp "${TMPDIR:-/tmp}/ai-parse-XXXXXX")
+    trap 'rm -f "$tmpfile"' EXIT
+    printf '%s' "$response" >"$tmpfile"
+    awk '
+        /^```json/ { capture=1; block=""; next }
+        /^```$/ && capture { capture=0; last_block=block; next }
+        capture { block = block (block ? "\n" : "") $0 }
+        END { if (capture && block) print block; else if (last_block) print last_block }
+    ' "$tmpfile" | jq '.' 2>/dev/null
+) ); then
+    if [[ -n "$parsed" ]]; then
+        printf '%s' "$parsed"
+        return 0
+    fi
+fi
-	local tmpfile
-	tmpfile=$(mktemp "${TMPDIR:-/tmp}/ai-parse-XXXXXX")
-	printf '%s' "$response" >"$tmpfile"
-
-	# Try file-based extraction of last ```json block
-	json_block=$(awk '
-		/^```json/ { capture=1; block=""; next }
-		/^```$/ && capture { capture=0; last_block=block; next }
-		capture { block = block (block ? "\n" : "") $0 }
-		END { if (capture && block) print block; else if (last_block) print last_block }
-	' "$tmpfile")
-	rm -f "$tmpfile"
-
-	if [[ -n "$json_block" ]]; then
-		parsed=$(printf '%s' "$json_block" | jq '.' 2>/dev/null)
-		if [[ $? -eq 0 && -n "$parsed" ]]; then
-			printf '%s' "$parsed"
-			return 0
-		fi
-	fi
+if parsed=$( (
+    local tmpfile
+    tmpfile=$(mktemp "${TMPDIR:-/tmp}/ai-parse-XXXXXX")
+    trap 'rm -f "$tmpfile"' EXIT
+    printf '%s' "$response" >"$tmpfile"
+    awk '
+        /^```json/ { capture=1; block=""; next }
+        /^```$/ && capture { capture=0; last_block=block; next }
+        capture { block = block (block ? "\n" : "") $0 }
+        END { if (capture && block) print block; else if (last_block) print last_block }
+    ' "$tmpfile" | jq '.' 2>/dev/null
+) ); then
+    if [[ -n "$parsed" ]]; then
+        printf '%s' "$parsed"
+        return 0
+    fi
+fi
+
 	# Failed to parse
 	echo ""
 	return 0

diff --git a/.agents/scripts/supervisor/pulse.sh b/.agents/scripts/supervisor/pulse.sh
@@ -497,6 +497,65 @@ cmd_pulse() {
 		done <<<"$retrying_tasks"
 	fi
 
+	# Phase 1c: Auto-reap stuck evaluating tasks (self-healing)
+	# Tasks can get stuck in 'evaluating' when the worker dies but evaluation
+	# fails or times out. Phase 1 handles tasks with dead workers that it finds
+	# in the running_tasks query, but tasks can also get stuck if:
+	#   - The evaluation itself crashed (jq error, timeout, etc.)
+	#   - The task was left in evaluating from a previous pulse that was killed
+	# This phase catches any evaluating task older than 10 minutes with no
+	# live worker process, and force-transitions it to failed for retry.
+	local stuck_evaluating
+	stuck_evaluating=$(db -separator '|' "$SUPERVISOR_DB" "
+		SELECT id, updated_at FROM tasks
+		WHERE status = 'evaluating'
+		AND updated_at < strftime('%Y-%m-%dT%H:%M:%SZ', 'now', '-10 minutes')
+		ORDER BY updated_at ASC;
+	" 2>/dev/null || echo "")
+
+	if [[ -n "$stuck_evaluating" ]]; then
+		while IFS='|' read -r stuck_id stuck_updated; do
+			[[ -z "$stuck_id" ]] && continue
+
+			# Double-check: is the worker actually dead?
+			local stuck_pid_file="$SUPERVISOR_DIR/pids/${stuck_id}.pid"
+			local stuck_alive=false
+			if [[ -f "$stuck_pid_file" ]]; then
+				local stuck_pid
+				stuck_pid=$(cat "$stuck_pid_file" 2>/dev/null || echo "")
+				if [[ -n "$stuck_pid" ]] && kill -0 "$stuck_pid" 2>/dev/null; then
+					stuck_alive=true
+				fi
+			fi
+
+			if [[ "$stuck_alive" == "true" ]]; then
+				log_info "  Phase 1c: $stuck_id evaluating since $stuck_updated but worker still alive — skipping"
+				continue
+			fi
+
+			log_warn "  Phase 1c: $stuck_id stuck in evaluating since $stuck_updated (worker dead) — force-transitioning to failed"
+
+			# Check retry count
+			local stuck_retries stuck_max_retries
+			stuck_retries=$(db "$SUPERVISOR_DB" "SELECT retries FROM tasks WHERE id = '$(sql_escape "$stuck_id")';" 2>/dev/null || echo 0)
+			stuck_max_retries=$(db "$SUPERVISOR_DB" "SELECT max_retries FROM tasks WHERE id = '$(sql_escape "$stuck_id")';" 2>/dev/null || echo 3)
+
+			if [[ "$stuck_retries" -lt "$stuck_max_retries" ]]; then
+				# Transition to retrying so it gets re-dispatched
+				cmd_transition "$stuck_id" "retrying" --error "Auto-reaped: stuck in evaluating >10min with dead worker (Phase 1c)" 2>>"$SUPERVISOR_LOG" || true
+				db "$SUPERVISOR_DB" "UPDATE tasks SET retries = retries + 1, updated_at = strftime('%Y-%m-%dT%H:%M:%SZ', 'now') WHERE id = '$(sql_escape "$stuck_id")';" 2>/dev/null || true
+				log_info "  Phase 1c: $stuck_id → retrying (retry $((stuck_retries + 1))/$stuck_max_retries)"
+			else
+				# Max retries exhausted — mark as failed
+				cmd_transition "$stuck_id" "failed" --error "Auto-reaped: stuck in evaluating >10min, max retries exhausted (Phase 1c)" 2>>"$SUPERVISOR_LOG" || true
+				log_warn "  Phase 1c: $stuck_id → failed (max retries exhausted)"
+			fi
+
+			# Clean up PID file
+			cleanup_worker_processes "$stuck_id" 2>>"$SUPERVISOR_LOG" || true
+		done <<<"$stuck_evaluating"
+	fi
+
 	# Phase 2: Dispatch queued tasks up to concurrency limit
 
 	if [[ -n "$batch_id" ]]; then
@@ -555,6 +614,59 @@ cmd_pulse() {
 		fi
 	fi
 
+	# Phase 2b: Dispatch stall detection and auto-recovery
+	# If there are queued tasks but nothing was dispatched and nothing is running,
+	# the pipeline is stalled. Common causes:
+	#   - No active batch (auto-pickup creates batches, but may have failed)
+	#   - All tasks stuck in non-dispatchable states (evaluating, blocked)
+	#   - Provider unavailable for extended period
+	#   - Concurrency limit misconfigured to 0
+	if [[ "$dispatched_count" -eq 0 ]]; then
+		local queued_count running_count
+		queued_count=$(db "$SUPERVISOR_DB" "SELECT COUNT(*) FROM tasks WHERE status = 'queued';" 2>/dev/null || echo 0)
+		running_count=$(db "$SUPERVISOR_DB" "SELECT COUNT(*) FROM tasks WHERE status IN ('running', 'dispatched');" 2>/dev/null || echo 0)
+
+		if [[ "$queued_count" -gt 0 && "$running_count" -eq 0 ]]; then
+			log_warn "Phase 2b: Dispatch stall detected — $queued_count queued, 0 running, 0 dispatched this pulse"
+
+			# Diagnose: is there an active batch?
+			local active_batch_count
+			active_batch_count=$(db "$SUPERVISOR_DB" "
+				SELECT COUNT(*) FROM batches
+				WHERE status IN ('active', 'running');" 2>/dev/null || echo 0)
+
+			if [[ "$active_batch_count" -eq 0 ]]; then
+				log_warn "Phase 2b: No active batch found — queued tasks have no batch to dispatch from"
+				# Auto-recovery: trigger auto-pickup to create a batch
+				# This handles the case where tasks were added to the DB but no batch was created
+				local stall_repos
+				stall_repos=$(db "$SUPERVISOR_DB" "SELECT DISTINCT repo FROM tasks WHERE status = 'queued';" 2>/dev/null || echo "")
+				if [[ -n "$stall_repos" ]]; then
+					while IFS= read -r stall_repo; do
+						[[ -z "$stall_repo" ]] && continue
+						log_info "Phase 2b: Re-running auto-pickup for $stall_repo to create batch"
+						cmd_auto_pickup --repo "$stall_repo" 2>>"$SUPERVISOR_LOG" || true
+					done <<<"$stall_repos"
+				fi
+			else
+				# Batch exists but dispatch failed — log diagnostic info
+				local batch_info
+				batch_info=$(db -separator '|' "$SUPERVISOR_DB" "
+					SELECT id, concurrency, status FROM batches
+					WHERE status IN ('active', 'running')
+					LIMIT 1;" 2>/dev/null || echo "")
+				log_warn "Phase 2b: Active batch exists ($batch_info) but dispatch produced 0 — check concurrency limits and provider health"
+			fi
+
+			# Track stall count in state_log for the AI self-reflection to pick up
+			db "$SUPERVISOR_DB" "
+				INSERT INTO state_log (task_id, from_state, to_state, reason)
+				VALUES ('supervisor', 'dispatch', 'stalled',
+						'$(sql_escape "Dispatch stall: $queued_count queued, 0 running, 0 dispatched. Active batches: $active_batch_count")');
+			" 2>/dev/null || true
+		fi
+	fi
+
 	# Phase 2.5: Contest mode — check running contests for completion (t1011)
 	# If any contest has all entries complete, evaluate cross-rankings and apply winner
 	local contest_helper="${SCRIPT_DIR}/contest-helper.sh"