From cc2c97a2c49d53d6cb5c0b4d426e39cb0dd7f5cc Mon Sep 17 00:00:00 2001 From: marcusquinn <6428977+marcusquinn@users.noreply.github.com> Date: Sat, 7 Mar 2026 17:44:16 +0000 Subject: [PATCH] perf: add shellcheck concurrency limiter, debounce, and orphan reaper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The wrapper now limits concurrent shellcheck processes to 4 (configurable via SHELLCHECK_MAX_PARALLEL), debounces repeated checks on unchanged files within 10s (SHELLCHECK_DEBOUNCE_SEC), and uses fast-path .real binary lookup instead of expensive PATH scanning (3.4s -> 0.38s per invocation). The process guard now kills orphaned shellcheck processes (ppid=1, age >120s) whose parent language server exited — these accumulate indefinitely and waste CPU competing with each other. Measured improvement: - Cold run: 3.4s -> 0.38s (9x faster) - Debounced: 3.4s -> 0.18s (19x faster) - 8 concurrent: 27s -> 1.5s (18x faster) - Orphan cleanup: 120 -> 2 processes on first run --- .agents/scripts/process-guard-helper.sh | 50 +++++--- .agents/scripts/shellcheck-wrapper.sh | 159 ++++++++++++++++++++++-- 2 files changed, 186 insertions(+), 23 deletions(-) diff --git a/.agents/scripts/process-guard-helper.sh b/.agents/scripts/process-guard-helper.sh index 424a3dbfd..2299a75b0 100755 --- a/.agents/scripts/process-guard-helper.sh +++ b/.agents/scripts/process-guard-helper.sh @@ -138,13 +138,13 @@ cmd_scan() { local violations=0 echo "--- AI Processes ---" - printf "%-8s %-6s %-10s %-5s %-12s %-8s %s\n" "PID" "RSS_MB" "RUNTIME" "TTY" "COMMAND" "STATUS" "DETAIL" + printf "%-8s %-6s %-6s %-10s %-5s %-12s %-8s %s\n" "PID" "PPID" "RSS_MB" "RUNTIME" "TTY" "COMMAND" "STATUS" "DETAIL" while IFS= read -r line; do [[ -z "$line" ]] && continue - # Fields: pid, tty, rss, etime, command (command is last — may contain spaces) - local pid tty rss etime cmd_full - read -r pid tty rss etime cmd_full <<<"$line" + # Fields: pid, ppid, tty, rss, etime, command (command is last — may contain spaces) + local pid ppid tty rss etime cmd_full + read -r pid ppid tty rss etime cmd_full <<<"$line" [[ "$pid" =~ ^[0-9]+$ ]] || continue [[ "$rss" =~ ^[0-9]+$ ]] || rss=0 @@ -181,10 +181,17 @@ cmd_scan() { status="OVER_TIME" detail="runtime ${age_seconds}s > ${runtime_limit}s" violations=$((violations + 1)) + elif [[ "$cmd_base" == "shellcheck" ]]; then + [[ "$ppid" =~ ^[0-9]+$ ]] || ppid=0 + if [[ "$ppid" -eq 1 ]] && [[ "$age_seconds" -gt 120 ]]; then + status="ORPHAN" + detail="ppid=1, age=${age_seconds}s (no consumer)" + violations=$((violations + 1)) + fi fi - printf "%-8s %-6s %-10s %-5s %-12s %-8s %s\n" "$pid" "${rss_mb}MB" "$etime" "$tty" "$cmd_base" "$status" "$detail" - done < <(ps axo pid,tty,rss,etime,command | grep -E 'opencode|shellcheck|node.*opencode' | grep -v grep || true) + printf "%-8s %-6s %-6s %-10s %-5s %-12s %-8s %s\n" "$pid" "$ppid" "${rss_mb}MB" "$etime" "$tty" "$cmd_base" "$status" "$detail" + done < <(ps axo pid,ppid,tty,rss,etime,command | grep -E 'opencode|shellcheck|node.*opencode' | grep -v grep || true) echo "" echo "Total: ${process_count} processes, $((total_rss_kb / 1024))MB RSS, ${violations} violation(s)" @@ -215,9 +222,9 @@ cmd_kill_runaways() { while IFS= read -r line; do [[ -z "$line" ]] && continue - # Fields: pid, tty, rss, etime, command (command is last — may contain spaces) - local pid tty rss etime cmd_full - read -r pid tty rss etime cmd_full <<<"$line" + # Fields: pid, ppid, tty, rss, etime, command (command is last — may contain spaces) + local pid ppid tty rss etime cmd_full + read -r pid ppid tty rss etime cmd_full <<<"$line" [[ "$pid" =~ ^[0-9]+$ ]] || continue [[ "$rss" =~ ^[0-9]+$ ]] || rss=0 @@ -248,6 +255,16 @@ cmd_kill_runaways() { violation="runtime ${age_seconds}s > ${runtime_limit}s" fi + # Orphan shellcheck reaper: if parent is PID 1 (reparented because the + # language server that spawned it exited) and alive >120s, kill it. + # Nobody is reading the output, so the work is wasted CPU. + if [[ -z "$violation" ]] && [[ "$cmd_base" == "shellcheck" ]]; then + [[ "$ppid" =~ ^[0-9]+$ ]] || ppid=0 + if [[ "$ppid" -eq 1 ]] && [[ "$age_seconds" -gt 120 ]]; then + violation="orphan (ppid=1, age=${age_seconds}s)" + fi + fi + if [[ -n "$violation" ]]; then local rss_mb=$((rss / 1024)) echo "Killing PID $pid ($cmd_base) — $violation" @@ -260,7 +277,7 @@ cmd_kill_runaways() { killed=$((killed + 1)) total_freed_mb=$((total_freed_mb + rss_mb)) fi - done < <(ps axo pid,tty,rss,etime,command | grep -E 'opencode|shellcheck|node.*opencode' | grep -v grep || true) + done < <(ps axo pid,ppid,tty,rss,etime,command | grep -E 'opencode|shellcheck|node.*opencode' | grep -v grep || true) if [[ "$killed" -gt 0 ]]; then echo "Killed $killed process(es), freed ~${total_freed_mb}MB" @@ -302,9 +319,9 @@ cmd_status() { while IFS= read -r line; do [[ -z "$line" ]] && continue - # Fields: pid, tty, rss, etime, command (command is last — may contain spaces) - local pid tty rss etime cmd_full - read -r pid tty rss etime cmd_full <<<"$line" + # Fields: pid, ppid, tty, rss, etime, command (command is last — may contain spaces) + local pid ppid tty rss etime cmd_full + read -r pid ppid tty rss etime cmd_full <<<"$line" [[ "$rss" =~ ^[0-9]+$ ]] || rss=0 total_rss_kb=$((total_rss_kb + rss)) process_count=$((process_count + 1)) @@ -326,8 +343,13 @@ cmd_status() { fi if [[ "$rss" -gt "$rss_limit" ]] || [[ "$age_seconds" -gt "$runtime_limit" ]]; then violations=$((violations + 1)) + elif [[ "$cmd_base" == "shellcheck" ]]; then + [[ "$ppid" =~ ^[0-9]+$ ]] || ppid=0 + if [[ "$ppid" -eq 1 ]] && [[ "$age_seconds" -gt 120 ]]; then + violations=$((violations + 1)) + fi fi - done < <(ps axo pid,tty,rss,etime,command | grep -E 'opencode|shellcheck|node.*opencode' | grep -v grep || true) + done < <(ps axo pid,ppid,tty,rss,etime,command | grep -E 'opencode|shellcheck|node.*opencode' | grep -v grep || true) local session_count session_count=$(ps axo tty,command | awk ' diff --git a/.agents/scripts/shellcheck-wrapper.sh b/.agents/scripts/shellcheck-wrapper.sh index 233081578..aa87ebb14 100755 --- a/.agents/scripts/shellcheck-wrapper.sh +++ b/.agents/scripts/shellcheck-wrapper.sh @@ -8,20 +8,30 @@ # --external-sources to prevent any residual source-following expansion. # # This wrapper strips --external-sources from the arguments before passing them -# to the real ShellCheck binary. It also enforces a memory limit via ulimit. +# to the real ShellCheck binary. It also enforces a memory limit via ulimit, +# limits concurrent shellcheck processes, and debounces repeated checks on +# unchanged files. # # Usage: # Set SHELLCHECK_PATH to this script's path, or place it earlier on PATH as # "shellcheck". The bash language server will use it instead of the real binary. # # Environment variables: -# SHELLCHECK_REAL_PATH — Path to the real shellcheck binary (auto-detected) -# SHELLCHECK_VMEM_MB — Virtual memory limit in MB (default: 2048) +# SHELLCHECK_REAL_PATH — Path to the real shellcheck binary (auto-detected) +# SHELLCHECK_VMEM_MB — Virtual memory limit in MB (default: 2048) +# SHELLCHECK_MAX_PARALLEL — Max concurrent shellcheck processes (default: 4) +# SHELLCHECK_DEBOUNCE_SEC — Skip re-check if file unchanged within N sec (default: 10) # # GH#2915: https://github.com/marcusquinn/aidevops/issues/2915 set -uo pipefail +# --- Configuration --- +_SC_MAX_PARALLEL="${SHELLCHECK_MAX_PARALLEL:-4}" +_SC_DEBOUNCE_SEC="${SHELLCHECK_DEBOUNCE_SEC:-10}" +_SC_CACHE_DIR="/tmp/shellcheck-wrapper-cache" +_SC_LOCK_DIR="/tmp/shellcheck-wrapper-locks" + # --- Find the real ShellCheck binary --- _find_real_shellcheck() { local real_path="${SHELLCHECK_REAL_PATH:-}" @@ -31,7 +41,27 @@ _find_real_shellcheck() { return 0 fi - # Search PATH, skipping this wrapper script + # Fast path: check .real sibling of this script's location first. + # When setup.sh replaces the binary, it moves it to shellcheck.real. + # This avoids expensive PATH scanning and realpath resolution. + local self_dir + self_dir="$(dirname "${BASH_SOURCE[0]}" 2>/dev/null || echo ".")" + local sibling="${self_dir}/shellcheck.real" + if [[ -x "$sibling" ]]; then + printf '%s' "$sibling" + return 0 + fi + + # Fast path: check common .real locations before PATH scanning + local loc + for loc in /opt/homebrew/bin/shellcheck.real /usr/local/bin/shellcheck.real; do + if [[ -x "$loc" ]]; then + printf '%s' "$loc" + return 0 + fi + done + + # Slow path: search PATH, skipping this wrapper script local self self="$(realpath "${BASH_SOURCE[0]}" 2>/dev/null || readlink -f "${BASH_SOURCE[0]}" 2>/dev/null || echo "${BASH_SOURCE[0]}")" @@ -48,9 +78,8 @@ _find_real_shellcheck() { fi done <<<"$PATH" - # Common locations (including .real suffix for binary-replacement installs) - local loc - for loc in /opt/homebrew/bin/shellcheck.real /opt/homebrew/bin/shellcheck /usr/local/bin/shellcheck.real /usr/local/bin/shellcheck /usr/bin/shellcheck; do + # Last resort: common locations without .real suffix + for loc in /opt/homebrew/bin/shellcheck /usr/local/bin/shellcheck /usr/bin/shellcheck; do if [[ -x "$loc" ]]; then local resolved resolved="$(realpath "$loc" 2>/dev/null || readlink -f "$loc" 2>/dev/null || echo "$loc")" @@ -65,7 +94,7 @@ _find_real_shellcheck() { return 1 } -# --- Filter arguments --- +# --- Filter arguments and extract target file --- _filter_args() { local args=() while [[ $# -gt 0 ]]; do @@ -82,6 +111,85 @@ _filter_args() { printf '%s\n' "${args[@]}" } +# --- Debounce: skip if file unchanged since last check --- +# Returns 0 if check should be skipped (file unchanged), 1 if check needed. +# Reads stdin for the target file path when called from a pipe. +_debounce_check() { + local target_file="$1" + + # Only debounce for real files (not stdin "-") + [[ -z "$target_file" || "$target_file" == "-" ]] && return 1 + [[ -f "$target_file" ]] || return 1 + + mkdir -p "$_SC_CACHE_DIR" 2>/dev/null || return 1 + + # Hash the absolute path for the cache key + local cache_key + cache_key=$(printf '%s' "$target_file" | cksum | awk '{print $1}') + local cache_file="${_SC_CACHE_DIR}/${cache_key}" + + # Get file modification time (seconds since epoch) + local file_mtime + if [[ "$(uname)" == "Darwin" ]]; then + file_mtime=$(stat -f '%m' "$target_file" 2>/dev/null) || return 1 + else + file_mtime=$(stat -c '%Y' "$target_file" 2>/dev/null) || return 1 + fi + + # Check cache: if cache exists, file hasn't changed, and within debounce window + if [[ -f "$cache_file" ]]; then + local cached_mtime cached_time + read -r cached_mtime cached_time 2>/dev/null <"$cache_file" || return 1 + local now + now=$(date +%s) + if [[ "$cached_mtime" == "$file_mtime" ]] && [[ $((now - cached_time)) -lt $_SC_DEBOUNCE_SEC ]]; then + # File unchanged and within debounce window — skip + return 0 + fi + fi + + # Update cache with current mtime and timestamp + printf '%s %s' "$file_mtime" "$(date +%s)" >"$cache_file" 2>/dev/null || true + return 1 +} + +# --- Concurrency limiter: acquire a slot or exit --- +# Uses mkdir-based locks (atomic on all filesystems). +# Returns 0 if slot acquired (caller must release), 1 if all slots busy. +_acquire_slot() { + mkdir -p "$_SC_LOCK_DIR" 2>/dev/null || return 1 + + local slot + for slot in $(seq 1 "$_SC_MAX_PARALLEL"); do + local lock="${_SC_LOCK_DIR}/slot-${slot}" + if mkdir "$lock" 2>/dev/null; then + # Got a slot — record PID for stale lock cleanup + printf '%s' "$$" >"${lock}/pid" 2>/dev/null || true + printf '%s' "$slot" + return 0 + fi + # Check if the lock holder is still alive (stale lock cleanup) + local holder_pid + holder_pid=$(cat "${lock}/pid" 2>/dev/null) || continue + if [[ -n "$holder_pid" ]] && ! kill -0 "$holder_pid" 2>/dev/null; then + # Stale lock — remove and retry + rm -rf "$lock" 2>/dev/null || true + if mkdir "$lock" 2>/dev/null; then + printf '%s' "$$" >"${lock}/pid" 2>/dev/null || true + printf '%s' "$slot" + return 0 + fi + fi + done + return 1 +} + +_release_slot() { + local slot="$1" + rm -rf "${_SC_LOCK_DIR}/slot-${slot}" 2>/dev/null || true + return 0 +} + # --- Main --- main() { local real_shellcheck @@ -93,13 +201,46 @@ main() { filtered_args+=("$arg") done < <(_filter_args "$@") + # Find the target file (last non-flag argument, or "-" for stdin) + local target_file="" + local i + for i in "${filtered_args[@]+"${filtered_args[@]}"}"; do + case "$i" in + -*) ;; # skip flags + *) target_file="$i" ;; # last positional arg is the file + esac + done + + # Debounce: skip if file unchanged since last check + if [[ -n "$target_file" ]] && _debounce_check "$target_file"; then + exit 0 + fi + + # Concurrency limit: wait briefly for a slot, then give up + local slot="" + local attempt + for attempt in 1 2 3; do + slot=$(_acquire_slot) && break + slot="" + sleep 1 + done + + if [[ -z "$slot" ]]; then + # All slots busy after 3 attempts — exit silently. + # No output = no diagnostics, which is better than 100 competing processes. + exit 0 + fi + + # Ensure slot is released on exit (normal, error, or signal) + trap '_release_slot "'"$slot"'"' EXIT + # Enforce memory limit (soft limit — ShellCheck can still be killed by the # memory pressure monitor if it exceeds this, but this prevents the worst case) local vmem_mb="${SHELLCHECK_VMEM_MB:-2048}" local vmem_kb=$((vmem_mb * 1024)) ulimit -v "$vmem_kb" 2>/dev/null || true - exec "$real_shellcheck" "${filtered_args[@]+"${filtered_args[@]}"}" + "$real_shellcheck" "${filtered_args[@]+"${filtered_args[@]}"}" } main "$@"