Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 17 additions & 14 deletions .agents/scripts/contributor-activity-helper.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1023,25 +1023,23 @@ print(','.join(sorted(logins)))
# Query GitHub Search API for each login.
# Uses gh api with search/issues endpoint — returns total_count without pagination.
# Rate limit: 30 requests/min for search API. With 4 queries per user,
# we can handle ~7 users per minute. For larger teams, the function
# checks remaining rate limit and sleeps until reset if needed.
# we can handle ~7 users per minute. If budget is exhausted, bail out
# with partial results instead of blocking (t1429).
local results_json="["
local first=true
local _ps_partial=false
local IFS=','
for login in $logins_csv; do
# Check search API rate limit before each batch of 4 queries per user
local remaining
remaining=$(gh api rate_limit --jq '.resources.search.remaining' 2>/dev/null) || remaining=30
if [[ "$remaining" -lt 5 ]]; then
local reset_at
reset_at=$(gh api rate_limit --jq '.resources.search.reset' 2>/dev/null) || reset_at=0
local now_epoch
now_epoch=$(date +%s)
local wait_secs=$((reset_at - now_epoch + 1))
if [[ "$wait_secs" -gt 0 && "$wait_secs" -lt 120 ]]; then
echo "Rate limit low (${remaining} remaining), waiting ${wait_secs}s..." >&2
sleep "$wait_secs"
fi
# t1429: bail out with partial results instead of sleeping.
# The old code slept until reset, creating an infinite blocking
# loop when multiple users × repos exhausted the 30 req/min budget.
echo "Rate limit exhausted (${remaining} remaining), returning partial results" >&2
_ps_partial=true
break
fi

# Issues created by this user in this repo since the date
Expand Down Expand Up @@ -1070,13 +1068,14 @@ print(','.join(sorted(logins)))
unset IFS
results_json+="]"

# Format output
# Format output (pass partial flag so callers can detect truncated data)
echo "$results_json" | python3 -c "
import sys
import json

format_type = sys.argv[1]
period_name = sys.argv[2]
is_partial = sys.argv[3] == 'true'

data = json.load(sys.stdin)

Expand All @@ -1086,7 +1085,8 @@ for d in data:
data.sort(key=lambda x: x['total_output'], reverse=True)

if format_type == 'json':
print(json.dumps(data, indent=2))
result = {'data': data, 'partial': is_partial}
print(json.dumps(result, indent=2))
else:
if not data:
print(f'_No GitHub activity for the last {period_name}._')
Expand All @@ -1097,7 +1097,10 @@ else:
for d in data:
pct = round(d['total_output'] / grand_total * 100, 1)
print(f'| {d[\"login\"]} | {d[\"issues_created\"]} | {d[\"prs_created\"]} | {d[\"prs_merged\"]} | {d[\"commented_on\"]} | {pct}% |')
" "$format" "$period"
if is_partial:
print()
print('_Partial results — GitHub Search API rate limit exhausted._')
" "$format" "$period" "$_ps_partial"

return 0
}
Expand Down
28 changes: 11 additions & 17 deletions .agents/scripts/pulse-wrapper.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3259,19 +3259,18 @@ prefetch_contribution_watch() {
#######################################
# Main
#
# Execution order (GH#2958):
# Execution order (t1429):
# 1. Gate checks (consent, dedup)
# 2. Cleanup (orphans, worktrees)
# 3. Pre-pulse housekeeping (quality sweep, health issues) — these are
# shell-level operations that run quickly and don't need the LLM.
# Running them BEFORE the pulse ensures the LLM session gets maximum
# time for its actual job (triage, dispatch, PR review).
# 4. Prefetch state (parallel gh API calls)
# 5. Run pulse (LLM session — the main event)
#
# Previously, quality sweep and health issues ran AFTER the pulse. This
# meant the pulse's 30-min timeout was shared with these operations,
# and the LLM session was killed before completing its work.
# 2. Cleanup (orphans, worktrees, stashes)
# 3. Prefetch state (parallel gh API calls)
# 4. Run pulse (LLM session — dispatch workers, merge PRs)
#
# Statistics (quality sweep, health issues, person-stats) run in a
# SEPARATE process — stats-wrapper.sh — on its own cron schedule.
# They must never share a process with the pulse because they depend
# on GitHub Search API (30 req/min limit). When budget is exhausted,
# contributor-activity-helper.sh bails out with partial results, but
# even the API calls themselves add latency that delays dispatch.
#######################################
main() {
if ! check_session_gate; then
Expand All @@ -3293,11 +3292,6 @@ main() {
calculate_priority_allocations
check_session_count >/dev/null

# Run housekeeping BEFORE the pulse — these are shell-level operations
# that don't need the LLM and shouldn't eat into pulse time (GH#2958).
run_daily_quality_sweep
update_health_issues

# Contribution watch: lightweight scan of external issues/PRs (t1419).
# Deterministic — only checks timestamps/authorship, never processes
# comment bodies. Output appended to STATE_FILE for the pulse agent.
Expand Down
123 changes: 123 additions & 0 deletions .agents/scripts/stats-wrapper.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
#!/usr/bin/env bash
# stats-wrapper.sh - Separate process for statistics and health updates
#
# Runs quality sweep, health issue updates, and person-stats independently
# of the supervisor pulse. These operations depend on GitHub Search API
# (30 req/min limit) and can block for extended periods when rate-limited.
# Running them in-process with the pulse prevented dispatch and merge work
# from ever executing. See t1429 for the full root cause analysis.
#
# Called by cron/launchd every 15 minutes. Has its own PID dedup and hard timeout.

set -euo pipefail

#######################################
# PATH normalisation — same as pulse-wrapper.sh
#######################################
export PATH="/bin:/usr/bin:/usr/local/bin:/opt/homebrew/bin:${PATH}"

# Use ${BASH_SOURCE[0]:-$0} for shell portability — BASH_SOURCE is undefined
# in zsh (MCP shell environment). See GH#3931.
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" || return 2>/dev/null || exit
source "${SCRIPT_DIR}/shared-constants.sh"
source "${SCRIPT_DIR}/worker-lifecycle-common.sh"

#######################################
# Configuration
#######################################
STATS_TIMEOUT="${STATS_TIMEOUT:-600}" # 10 min hard ceiling
STATS_TIMEOUT=$(_validate_int STATS_TIMEOUT "$STATS_TIMEOUT" 600 60)

STATS_PIDFILE="${HOME}/.aidevops/logs/stats.pid"
STATS_LOGFILE="${HOME}/.aidevops/logs/stats.log"

mkdir -p "$(dirname "$STATS_PIDFILE")"

#######################################
# PID-based dedup — same pattern as pulse-wrapper check_dedup()
#######################################
check_stats_dedup() {
if [[ ! -f "$STATS_PIDFILE" ]]; then
return 0
fi

# PID file format: "PID EPOCH" (PID + start timestamp)
local old_pid old_epoch
read -r old_pid old_epoch <"$STATS_PIDFILE" 2>/dev/null || {
rm -f "$STATS_PIDFILE"
return 0
}

if [[ -z "$old_pid" ]]; then
rm -f "$STATS_PIDFILE"
return 0
fi

if ! ps -p "$old_pid" >/dev/null 2>&1; then
rm -f "$STATS_PIDFILE"
return 0
fi

# Check age using stored epoch (portable — no date -d / _get_process_age)
old_epoch="${old_epoch:-0}"
local now
now=$(date +%s)
local elapsed=$((now - old_epoch))

if [[ "$elapsed" -gt "$STATS_TIMEOUT" ]]; then
echo "[stats-wrapper] Killing stale stats process $old_pid (${elapsed}s)" >>"$STATS_LOGFILE"
_kill_tree "$old_pid" || true
sleep 2
if kill -0 "$old_pid" 2>/dev/null; then
_force_kill_tree "$old_pid" || true
fi
rm -f "$STATS_PIDFILE"
return 0
fi

echo "[stats-wrapper] Stats already running (PID $old_pid, ${elapsed}s). Skipping." >>"$STATS_LOGFILE"
return 1
}

#######################################
# Main
#######################################
main() {
if ! check_stats_dedup; then
return 0
fi

echo "$$ $(date +%s)" >"$STATS_PIDFILE"
trap 'rm -f "$STATS_PIDFILE"' EXIT

echo "[stats-wrapper] Starting at $(date -u +%Y-%m-%dT%H:%M:%SZ)" >>"$STATS_LOGFILE"

# Source pulse-wrapper to reuse its functions
# (update_health_issues, run_daily_quality_sweep, etc.)
# pulse-wrapper.sh has a source guard — main() won't execute on source.
# shellcheck source=pulse-wrapper.sh
source "${SCRIPT_DIR}/pulse-wrapper.sh" || {
echo "[stats-wrapper] Failed to source pulse-wrapper.sh" >>"$STATS_LOGFILE"
return 1
}

run_daily_quality_sweep || true
update_health_issues || true

echo "[stats-wrapper] Finished at $(date -u +%Y-%m-%dT%H:%M:%SZ)" >>"$STATS_LOGFILE"
return 0
}

# Shell-portable source detection — same as pulse-wrapper (GH#3931)
_stats_is_sourced() {
if [[ -n "${BASH_SOURCE[0]:-}" ]]; then
[[ "${BASH_SOURCE[0]}" != "${0}" ]]
elif [[ -n "${ZSH_EVAL_CONTEXT:-}" ]]; then
[[ ":${ZSH_EVAL_CONTEXT}:" == *":file:"* ]]
else
return 1
fi
}
if ! _stats_is_sourced; then
main "$@"
fi
85 changes: 85 additions & 0 deletions setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -990,6 +990,91 @@ PLIST
fi
fi

# Enable stats-wrapper — runs quality sweep and health issue updates
# separately from the pulse (t1429). Only installed when the supervisor
# pulse is enabled (stats are useless without it).
local stats_script="$HOME/.aidevops/agents/scripts/stats-wrapper.sh"
local stats_label="com.aidevops.aidevops-stats-wrapper"
if [[ -x "$stats_script" ]] && [[ "$_pulse_lower" == "true" ]]; then
local _stats_installed=false
if _launchd_has_agent "$stats_label"; then
_stats_installed=true
elif crontab -l 2>/dev/null | grep -qF "aidevops: stats-wrapper"; then
_stats_installed=true
fi
if [[ "$_stats_installed" == "false" ]]; then
if [[ "$(uname -s)" == "Darwin" ]]; then
local stats_plist="$HOME/Library/LaunchAgents/${stats_label}.plist"
local _xml_stats_script _xml_stats_home _xml_stats_path
_xml_stats_script=$(_xml_escape "$stats_script")
_xml_stats_home=$(_xml_escape "$HOME")
_xml_stats_path=$(_xml_escape "$PATH")
cat >"$stats_plist" <<PLIST
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>Label</key>
<string>${stats_label}</string>
<key>ProgramArguments</key>
<array>
<string>/bin/bash</string>
<string>${_xml_stats_script}</string>
</array>
<key>StartInterval</key>
<integer>900</integer>
<key>StandardOutPath</key>
<string>${_xml_stats_home}/.aidevops/logs/stats.log</string>
<key>StandardErrorPath</key>
<string>${_xml_stats_home}/.aidevops/logs/stats.log</string>
<key>EnvironmentVariables</key>
<dict>
<key>PATH</key>
<string>${_xml_stats_path}</string>
<key>HOME</key>
<string>${_xml_stats_home}</string>
</dict>
<key>RunAtLoad</key>
<true/>
<key>KeepAlive</key>
<false/>
</dict>
</plist>
PLIST
if launchctl load "$stats_plist"; then
print_info "Stats wrapper enabled (launchd, every 15 min)"
else
print_warning "Failed to load stats wrapper LaunchAgent"
fi
else
local _cron_stats_script
_cron_stats_script=$(_cron_escape "$stats_script")
(
crontab -l 2>/dev/null | grep -v 'aidevops: stats-wrapper'
echo "*/15 * * * * PATH=\"/usr/local/bin:/usr/bin:/bin\" /bin/bash ${_cron_stats_script} >> \"\$HOME/.aidevops/logs/stats.log\" 2>&1 # aidevops: stats-wrapper"
) | crontab - || true
if crontab -l 2>/dev/null | grep -qF "aidevops: stats-wrapper"; then
print_info "Stats wrapper enabled (cron, every 15 min)"
fi
fi
fi
elif [[ "$_pulse_lower" == "false" ]]; then
# Remove stats scheduler if pulse is disabled
if [[ "$(uname -s)" == "Darwin" ]]; then
local stats_plist="$HOME/Library/LaunchAgents/${stats_label}.plist"
if _launchd_has_agent "$stats_label"; then
launchctl unload "$stats_plist" || true
rm -f "$stats_plist"
print_info "Stats wrapper disabled (launchd agent removed — pulse is off)"
fi
else
if crontab -l 2>/dev/null | grep -qF "aidevops: stats-wrapper"; then
crontab -l 2>/dev/null | grep -v 'aidevops: stats-wrapper' | crontab - || true
print_info "Stats wrapper disabled (cron entry removed — pulse is off)"
fi
fi
fi

# Enable repo-sync scheduler if not already installed
# Keeps local git repos up to date with daily ff-only pulls
# Respects config: aidevops config set orchestration.repo_sync false
Expand Down
Loading
Loading