diff --git a/.agents/scripts/pulse-wrapper.sh b/.agents/scripts/pulse-wrapper.sh index 85021c2ec..22fb45329 100755 --- a/.agents/scripts/pulse-wrapper.sh +++ b/.agents/scripts/pulse-wrapper.sh @@ -225,16 +225,28 @@ check_dedup() { # Underfilled stale recovery: if the pulse process has been running long # enough and worker pool is below target, recycle now instead of waiting - # for the full stale threshold. This prevents prolonged underfill windows - # when a pulse session is alive but not generating new workers. - local max_workers active_workers + # for the full stale threshold. Adapt timeout by underfill severity to + # recover capacity faster during deep underfill while keeping tolerance + # for minor underfill blips. + local max_workers active_workers deficit_pct adaptive_timeout max_workers=$(get_max_workers_target) active_workers=$(count_active_workers) [[ "$max_workers" =~ ^[0-9]+$ ]] || max_workers=1 [[ "$active_workers" =~ ^[0-9]+$ ]] || active_workers=0 + deficit_pct=0 + adaptive_timeout="$PULSE_UNDERFILLED_STALE_RECOVERY_TIMEOUT" + + if [[ "$active_workers" -lt "$max_workers" ]]; then + deficit_pct=$(((max_workers - active_workers) * 100 / max_workers)) + if [[ "$deficit_pct" -ge 50 ]]; then + adaptive_timeout=300 + elif [[ "$deficit_pct" -ge 25 ]]; then + adaptive_timeout=450 + fi + fi - if [[ "$elapsed_seconds" -gt "$PULSE_UNDERFILLED_STALE_RECOVERY_TIMEOUT" && "$active_workers" -lt "$max_workers" ]]; then - echo "[pulse-wrapper] Recycling stale pulse process $old_pid early (running ${elapsed_seconds}s, underfilled ${active_workers}/${max_workers}, threshold ${PULSE_UNDERFILLED_STALE_RECOVERY_TIMEOUT}s)" >>"$LOGFILE" + if [[ "$elapsed_seconds" -gt "$adaptive_timeout" && "$active_workers" -lt "$max_workers" ]]; then + echo "[pulse-wrapper] Recycling stale pulse process $old_pid early (running ${elapsed_seconds}s, underfilled ${active_workers}/${max_workers} [${deficit_pct}%], threshold ${adaptive_timeout}s)" >>"$LOGFILE" _kill_tree "$old_pid" || true sleep 2 if kill -0 "$old_pid" 2>/dev/null; then