diff --git a/.agents/scripts/pulse-wrapper.sh b/.agents/scripts/pulse-wrapper.sh index 44de28eef..71c2949b8 100755 --- a/.agents/scripts/pulse-wrapper.sh +++ b/.agents/scripts/pulse-wrapper.sh @@ -33,25 +33,64 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" || exit ####################################### # Configuration ####################################### -PULSE_STALE_THRESHOLD="${PULSE_STALE_THRESHOLD:-1800}" # 30 min = definitely stuck (opencode idle bug) +PULSE_STALE_THRESHOLD="${PULSE_STALE_THRESHOLD:-1800}" # 30 min = definitely stuck (opencode idle bug) +ORPHAN_MAX_AGE="${ORPHAN_MAX_AGE:-7200}" # 2 hours — kill orphans older than this +RAM_PER_WORKER_MB="${RAM_PER_WORKER_MB:-1024}" # 1 GB per worker +RAM_RESERVE_MB="${RAM_RESERVE_MB:-8192}" # 8 GB reserved for OS + user apps +MAX_WORKERS_CAP="${MAX_WORKERS_CAP:-8}" # Hard ceiling regardless of RAM +QUALITY_SWEEP_INTERVAL="${QUALITY_SWEEP_INTERVAL:-86400}" # 24 hours between sweeps + +# Validate numeric configuration — prevent command injection via $(( )) expansion. +# Bash arithmetic evaluates variable contents as expressions, so unsanitised strings +# like "a[$(cmd)]" would execute arbitrary commands. +_validate_int() { + local name="$1" value="$2" default="$3" min="${4:-0}" + if ! [[ "$value" =~ ^[0-9]+$ ]]; then + echo "[pulse-wrapper] Invalid ${name}: ${value} — using default ${default}" >&2 + printf '%s' "$default" + return 0 + fi + # Canonicalize to base-10: strip leading zeros to prevent bash octal interpretation + # e.g., "08" (invalid octal) or "01024" (octal 532) become "8" and "1024" + local canonical + canonical=$(printf '%d' "$((10#$value))") + # Enforce minimum to prevent divide-by-zero for divisor-backed settings + if ((canonical < min)); then + echo "[pulse-wrapper] ${name}=${canonical} below minimum ${min} — using default ${default}" >&2 + printf '%s' "$default" + return 0 + fi + printf '%s' "$canonical" + return 0 +} +PULSE_STALE_THRESHOLD=$(_validate_int PULSE_STALE_THRESHOLD "$PULSE_STALE_THRESHOLD" 1800) +ORPHAN_MAX_AGE=$(_validate_int ORPHAN_MAX_AGE "$ORPHAN_MAX_AGE" 7200) +RAM_PER_WORKER_MB=$(_validate_int RAM_PER_WORKER_MB "$RAM_PER_WORKER_MB" 1024 1) +RAM_RESERVE_MB=$(_validate_int RAM_RESERVE_MB "$RAM_RESERVE_MB" 8192) +MAX_WORKERS_CAP=$(_validate_int MAX_WORKERS_CAP "$MAX_WORKERS_CAP" 8) +QUALITY_SWEEP_INTERVAL=$(_validate_int QUALITY_SWEEP_INTERVAL "$QUALITY_SWEEP_INTERVAL" 86400) + +# Sanitise untrusted strings before embedding in GitHub markdown comments. +# Strips @ mentions (prevents unwanted notifications) and backtick sequences +# (prevents markdown injection). Used for API response data that gets posted +# as issue/PR comments. +_sanitize_markdown() { + local input="$1" + # Remove @ mentions to prevent notification spam + input="${input//@/}" + # Remove backtick sequences that could break markdown fencing + input="${input//\`/}" + printf '%s' "$input" + return 0 +} -# Validate numeric configuration -if ! [[ "$PULSE_STALE_THRESHOLD" =~ ^[0-9]+$ ]]; then - echo "[pulse-wrapper] Invalid PULSE_STALE_THRESHOLD: $PULSE_STALE_THRESHOLD — using default 1800" >&2 - PULSE_STALE_THRESHOLD=1800 -fi PIDFILE="${HOME}/.aidevops/logs/pulse.pid" LOGFILE="${HOME}/.aidevops/logs/pulse.log" OPENCODE_BIN="${OPENCODE_BIN:-/opt/homebrew/bin/opencode}" PULSE_DIR="${PULSE_DIR:-${HOME}/Git/aidevops}" PULSE_MODEL="${PULSE_MODEL:-anthropic/claude-sonnet-4-6}" -ORPHAN_MAX_AGE="${ORPHAN_MAX_AGE:-7200}" # 2 hours — kill orphans older than this -RAM_PER_WORKER_MB="${RAM_PER_WORKER_MB:-1024}" # 1 GB per worker -RAM_RESERVE_MB="${RAM_RESERVE_MB:-8192}" # 8 GB reserved for OS + user apps -MAX_WORKERS_CAP="${MAX_WORKERS_CAP:-8}" # Hard ceiling regardless of RAM REPOS_JSON="${REPOS_JSON:-${HOME}/.config/aidevops/repos.json}" STATE_FILE="${HOME}/.aidevops/logs/pulse-state.txt" -QUALITY_SWEEP_INTERVAL="${QUALITY_SWEEP_INTERVAL:-86400}" # 24 hours between sweeps QUALITY_SWEEP_LAST_RUN="${HOME}/.aidevops/logs/quality-sweep-last-run" QUALITY_SWEEP_STATE_DIR="${HOME}/.aidevops/logs/quality-sweep-state" CODERABBIT_ISSUE_SPIKE="${CODERABBIT_ISSUE_SPIKE:-10}" # trigger active review when issues increase by this many @@ -177,6 +216,12 @@ _get_process_age() { seconds="$etime" fi + # Validate components are numeric before arithmetic expansion + [[ "$days" =~ ^[0-9]+$ ]] || days=0 + [[ "$hours" =~ ^[0-9]+$ ]] || hours=0 + [[ "$minutes" =~ ^[0-9]+$ ]] || minutes=0 + [[ "$seconds" =~ ^[0-9]+$ ]] || seconds=0 + # Remove leading zeros to avoid octal interpretation days=$((10#${days})) hours=$((10#${hours})) @@ -519,6 +564,8 @@ _compute_struggle_ratio() { local threshold="${STRUGGLE_RATIO_THRESHOLD:-30}" local min_elapsed="${STRUGGLE_MIN_ELAPSED_MINUTES:-30}" + [[ "$threshold" =~ ^[0-9]+$ ]] || threshold=30 + [[ "$min_elapsed" =~ ^[0-9]+$ ]] || min_elapsed=30 local min_elapsed_seconds=$((min_elapsed * 60)) # Extract --dir from command line @@ -1154,6 +1201,10 @@ ${worker_table}" page_size=$(sysctl -n hw.pagesize 2>/dev/null || echo "16384") vm_free=$(vm_stat 2>/dev/null | awk '/Pages free/ {gsub(/\./,"",$3); print $3}') vm_inactive=$(vm_stat 2>/dev/null | awk '/Pages inactive/ {gsub(/\./,"",$3); print $3}') + # Validate integers before arithmetic expansion + [[ "$page_size" =~ ^[0-9]+$ ]] || page_size=16384 + [[ "$vm_free" =~ ^[0-9]+$ ]] || vm_free=0 + [[ "$vm_inactive" =~ ^[0-9]+$ ]] || vm_inactive=0 if [[ -n "$vm_free" ]]; then local avail_mb=$(((${vm_free:-0} + ${vm_inactive:-0}) * page_size / 1048576)) if [[ "$avail_mb" -lt 1024 ]]; then @@ -1189,7 +1240,10 @@ ${worker_table}" local sys_load_ratio="?" if [[ -n "${sys_load_1m:-}" && "${sys_cpu_cores:-0}" -gt 0 && "${sys_cpu_cores}" != "?" ]]; then - sys_load_ratio=$(awk "BEGIN {printf \"%d\", (${sys_load_1m} / ${sys_cpu_cores}) * 100}" 2>/dev/null || echo "?") + # Validate numeric before passing to awk (prevents awk injection) + if [[ "$sys_load_1m" =~ ^[0-9]+\.?[0-9]*$ ]] && [[ "$sys_cpu_cores" =~ ^[0-9]+$ ]]; then + sys_load_ratio=$(awk "BEGIN {printf \"%d\", (${sys_load_1m} / ${sys_cpu_cores}) * 100}" || echo "?") + fi fi # Worktree count for this repo @@ -1413,7 +1467,12 @@ run_daily_quality_sweep() { # Timestamp guard — run at most once per QUALITY_SWEEP_INTERVAL if [[ -f "$QUALITY_SWEEP_LAST_RUN" ]]; then local last_run - last_run=$(cat "$QUALITY_SWEEP_LAST_RUN" 2>/dev/null || echo "0") + last_run=$(cat "$QUALITY_SWEEP_LAST_RUN" || echo "0") + # Validate integer before arithmetic expansion (prevents command injection) + if ! [[ "$last_run" =~ ^[0-9]+$ ]]; then + echo "[pulse-wrapper] Corrupt sweep timestamp '${last_run}' — resetting" >>"$LOGFILE" + last_run=0 + fi local now now=$(date +%s) local elapsed=$((now - last_run)) @@ -1716,16 +1775,35 @@ _No smells detected or qlty analysis returned empty._ org_key=$(grep '^sonar.organization=' "${repo_path}/sonar-project.properties" 2>/dev/null | cut -d= -f2) if [[ -n "$project_key" && -n "$org_key" ]]; then + # URL-encode project_key to prevent injection via crafted sonar-project.properties + local encoded_project_key + encoded_project_key=$(python3 -c 'import urllib.parse,sys; print(urllib.parse.quote(sys.argv[1]))' "$project_key" 2>/dev/null) || encoded_project_key="" + if [[ -z "$encoded_project_key" ]]; then + echo "[pulse-wrapper] Failed to URL-encode project_key — skipping SonarCloud" >&2 + fi + # SonarCloud public API — quality gate status - local sonar_status - sonar_status=$(curl -s "https://sonarcloud.io/api/qualitygates/project_status?projectKey=${project_key}" 2>/dev/null || echo "") + local sonar_status="" + if [[ -n "$encoded_project_key" ]]; then + sonar_status=$(curl -sS --fail --connect-timeout 5 --max-time 20 \ + "https://sonarcloud.io/api/qualitygates/project_status?projectKey=${encoded_project_key}" || echo "") + fi if [[ -n "$sonar_status" ]] && echo "$sonar_status" | jq -e '.projectStatus' &>/dev/null; then - local gate_status - gate_status=$(echo "$sonar_status" | jq -r '.projectStatus.status // "UNKNOWN"') + # Single jq pass: extract gate status and conditions together + local gate_data + gate_data=$(echo "$sonar_status" | jq -r ' + (.projectStatus.status // "UNKNOWN") as $status | + [.projectStatus.conditions[]? | "- **\(.metricKey)**: \(.actualValue) (\(.status))"] | join("\n") as $conds | + "\($status)|\($conds)" + ') || gate_data="UNKNOWN|" + local gate_status="${gate_data%%|*}" + local conditions="${gate_data#*|}" + # Sanitise API data before embedding in markdown comment + gate_status=$(_sanitize_markdown "$gate_status") + conditions=$(_sanitize_markdown "$conditions") + # Feed sweep state for CodeRabbit conditional trigger (section 5) sweep_gate_status="$gate_status" - local conditions - conditions=$(echo "$sonar_status" | jq -r '.projectStatus.conditions[]? | "- **\(.metricKey)**: \(.actualValue) (\(.status))"' 2>/dev/null || echo "") sonar_section="### SonarCloud Quality Gate @@ -1735,22 +1813,41 @@ ${conditions} fi # Fetch open issues summary - local sonar_issues - sonar_issues=$(curl -s "https://sonarcloud.io/api/issues/search?componentKeys=${project_key}&statuses=OPEN,CONFIRMED,REOPENED&ps=1&facets=severities,types" 2>/dev/null || echo "") + local sonar_issues="" + if [[ -n "$encoded_project_key" ]]; then + sonar_issues=$(curl -sS --fail --connect-timeout 5 --max-time 20 \ + "https://sonarcloud.io/api/issues/search?componentKeys=${encoded_project_key}&statuses=OPEN,CONFIRMED,REOPENED&ps=1&facets=severities,types" || echo "") + fi if [[ -n "$sonar_issues" ]] && echo "$sonar_issues" | jq -e '.total' &>/dev/null; then - local total_issues - total_issues=$(echo "$sonar_issues" | jq -r '.total // 0') + # Single jq pass: extract total, high/critical count, severity breakdown, and type breakdown + local issues_data + issues_data=$(echo "$sonar_issues" | jq -r ' + (.total // 0) as $total | + ([.facets[]? | select(.property == "severities") | .values[]? | select(.val == "MAJOR" or .val == "CRITICAL" or .val == "BLOCKER") | .count] | add // 0) as $hc | + ([.facets[]? | select(.property == "severities") | .values[]? | " - \(.val): \(.count)"] | join("\n")) as $sev | + ([.facets[]? | select(.property == "types") | .values[]? | " - \(.val): \(.count)"] | join("\n")) as $typ | + "\($total)|\($hc)|\($sev)|\($typ)" + ') || issues_data="0|0||" + local total_issues="${issues_data%%|*}" + local remainder="${issues_data#*|}" + local high_critical_count="${remainder%%|*}" + remainder="${remainder#*|}" + local severity_breakdown="${remainder%%|*}" + local type_breakdown="${remainder#*|}" + # Validate numeric fields before any arithmetic use + if ! [[ "$total_issues" =~ ^[0-9]+$ ]]; then + total_issues=0 + fi + if ! [[ "$high_critical_count" =~ ^[0-9]+$ ]]; then + high_critical_count=0 + fi + # Feed sweep state for CodeRabbit conditional trigger (section 5) sweep_total_issues="$total_issues" - # Extract high+critical count for CodeRabbit trigger logic - local high_count critical_count - high_count=$(echo "$sonar_issues" | jq -r '.facets[]? | select(.property == "severities") | .values[]? | select(.val == "MAJOR" or .val == "CRITICAL" or .val == "BLOCKER") | .count' 2>/dev/null | awk '{s+=$1} END {print s+0}') - critical_count="${high_count:-0}" - sweep_high_critical="$critical_count" - local severity_breakdown - severity_breakdown=$(echo "$sonar_issues" | jq -r '.facets[]? | select(.property == "severities") | .values[]? | " - \(.val): \(.count)"' 2>/dev/null || echo "") - local type_breakdown - type_breakdown=$(echo "$sonar_issues" | jq -r '.facets[]? | select(.property == "types") | .values[]? | " - \(.val): \(.count)"' 2>/dev/null || echo "") + sweep_high_critical="$high_critical_count" + # Sanitise API data before embedding in markdown comment + severity_breakdown=$(_sanitize_markdown "$severity_breakdown") + type_breakdown=$(_sanitize_markdown "$type_breakdown") sonar_section="${sonar_section} - **Open issues**: ${total_issues} @@ -1781,6 +1878,7 @@ ${type_breakdown} if [[ -n "$codacy_response" ]] && echo "$codacy_response" | jq -e '.pagination' &>/dev/null; then local codacy_total codacy_total=$(echo "$codacy_response" | jq -r '.pagination.total // 0') + [[ "$codacy_total" =~ ^[0-9]+$ ]] || codacy_total=0 codacy_section="### Codacy - **Open issues**: ${codacy_total} @@ -1801,6 +1899,10 @@ ${type_breakdown} prev_state=$(_load_sweep_state "$repo_slug") local prev_gate prev_issues prev_high_critical IFS='|' read -r prev_gate prev_issues prev_high_critical <<<"$prev_state" + # Validate numeric fields from state file before arithmetic — corrupted or + # missing values would cause $(( )) to fail or produce nonsense deltas. + [[ "$prev_issues" =~ ^[0-9]+$ ]] || prev_issues=0 + [[ "$prev_high_critical" =~ ^[0-9]+$ ]] || prev_high_critical=0 local issue_delta=$((sweep_total_issues - prev_issues)) local high_critical_delta=$((sweep_high_critical - prev_high_critical)) @@ -1872,12 +1974,17 @@ _Monitoring: ${sweep_total_issues} issues (delta: ${issue_delta}), gate ${sweep_ --json) || scan_output="" if [[ -n "$scan_output" ]] && echo "$scan_output" | jq -e '.scanned' &>/dev/null; then - local scanned - scanned=$(echo "$scan_output" | jq -r '.scanned // 0') - local scan_findings - scan_findings=$(echo "$scan_output" | jq -r '.findings // 0') - local scan_issues - scan_issues=$(echo "$scan_output" | jq -r '.issues_created // 0') + # Single jq pass: extract all three fields at once + local scan_data + scan_data=$(echo "$scan_output" | jq -r '"\(.scanned // 0)|\(.findings // 0)|\(.issues_created // 0)"') || scan_data="0|0|0" + local scanned="${scan_data%%|*}" + local remainder="${scan_data#*|}" + local scan_findings="${remainder%%|*}" + local scan_issues="${remainder#*|}" + # Validate integers before any arithmetic comparison + [[ "$scanned" =~ ^[0-9]+$ ]] || scanned=0 + [[ "$scan_findings" =~ ^[0-9]+$ ]] || scan_findings=0 + [[ "$scan_issues" =~ ^[0-9]+$ ]] || scan_issues=0 review_scan_section="### Merged PR Review Scanner @@ -1989,6 +2096,7 @@ cleanup_orphans() { fi # This is an orphan — kill it + [[ "$rss" =~ ^[0-9]+$ ]] || rss=0 local mb=$((rss / 1024)) kill "$pid" 2>/dev/null || true killed=$((killed + 1)) @@ -2012,6 +2120,7 @@ cleanup_orphans() { [[ "$age_seconds" -lt "$ORPHAN_MAX_AGE" ]] && continue kill "$pid" 2>/dev/null || true + [[ "$rss" =~ ^[0-9]+$ ]] || rss=0 local mb=$((rss / 1024)) killed=$((killed + 1)) total_mb=$((total_mb + mb)) @@ -2039,11 +2148,16 @@ calculate_max_workers() { page_size=$(sysctl -n hw.pagesize 2>/dev/null || echo 16384) free_pages=$(vm_stat 2>/dev/null | awk '/Pages free/ {gsub(/\./,"",$3); print $3}') inactive_pages=$(vm_stat 2>/dev/null | awk '/Pages inactive/ {gsub(/\./,"",$3); print $3}') + # Validate integers before arithmetic expansion + [[ "$page_size" =~ ^[0-9]+$ ]] || page_size=16384 + [[ "$free_pages" =~ ^[0-9]+$ ]] || free_pages=0 + [[ "$inactive_pages" =~ ^[0-9]+$ ]] || inactive_pages=0 free_mb=$(((free_pages + inactive_pages) * page_size / 1024 / 1024)) else # Linux: use MemAvailable from /proc/meminfo free_mb=$(awk '/MemAvailable/ {print int($2/1024)}' /proc/meminfo 2>/dev/null || echo 8192) fi + [[ "$free_mb" =~ ^[0-9]+$ ]] || free_mb=8192 local available_mb=$((free_mb - RAM_RESERVE_MB)) local max_workers=$((available_mb / RAM_PER_WORKER_MB))