Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 36 additions & 14 deletions .agents/scripts/process-guard-helper.sh
Original file line number Diff line number Diff line change
Expand Up @@ -138,13 +138,13 @@ cmd_scan() {
local violations=0

echo "--- AI Processes ---"
printf "%-8s %-6s %-10s %-5s %-12s %-8s %s\n" "PID" "RSS_MB" "RUNTIME" "TTY" "COMMAND" "STATUS" "DETAIL"
printf "%-8s %-6s %-6s %-10s %-5s %-12s %-8s %s\n" "PID" "PPID" "RSS_MB" "RUNTIME" "TTY" "COMMAND" "STATUS" "DETAIL"

while IFS= read -r line; do
[[ -z "$line" ]] && continue
# Fields: pid, tty, rss, etime, command (command is last — may contain spaces)
local pid tty rss etime cmd_full
read -r pid tty rss etime cmd_full <<<"$line"
# Fields: pid, ppid, tty, rss, etime, command (command is last — may contain spaces)
local pid ppid tty rss etime cmd_full
read -r pid ppid tty rss etime cmd_full <<<"$line"

[[ "$pid" =~ ^[0-9]+$ ]] || continue
[[ "$rss" =~ ^[0-9]+$ ]] || rss=0
Expand Down Expand Up @@ -181,10 +181,17 @@ cmd_scan() {
status="OVER_TIME"
detail="runtime ${age_seconds}s > ${runtime_limit}s"
violations=$((violations + 1))
elif [[ "$cmd_base" == "shellcheck" ]]; then
[[ "$ppid" =~ ^[0-9]+$ ]] || ppid=0
if [[ "$ppid" -eq 1 ]] && [[ "$age_seconds" -gt 120 ]]; then
status="ORPHAN"
detail="ppid=1, age=${age_seconds}s (no consumer)"
violations=$((violations + 1))
fi
fi

printf "%-8s %-6s %-10s %-5s %-12s %-8s %s\n" "$pid" "${rss_mb}MB" "$etime" "$tty" "$cmd_base" "$status" "$detail"
done < <(ps axo pid,tty,rss,etime,command | grep -E 'opencode|shellcheck|node.*opencode' | grep -v grep || true)
printf "%-8s %-6s %-6s %-10s %-5s %-12s %-8s %s\n" "$pid" "$ppid" "${rss_mb}MB" "$etime" "$tty" "$cmd_base" "$status" "$detail"
done < <(ps axo pid,ppid,tty,rss,etime,command | grep -E 'opencode|shellcheck|node.*opencode' | grep -v grep || true)

echo ""
echo "Total: ${process_count} processes, $((total_rss_kb / 1024))MB RSS, ${violations} violation(s)"
Expand Down Expand Up @@ -215,9 +222,9 @@ cmd_kill_runaways() {

while IFS= read -r line; do
[[ -z "$line" ]] && continue
# Fields: pid, tty, rss, etime, command (command is last — may contain spaces)
local pid tty rss etime cmd_full
read -r pid tty rss etime cmd_full <<<"$line"
# Fields: pid, ppid, tty, rss, etime, command (command is last — may contain spaces)
local pid ppid tty rss etime cmd_full
read -r pid ppid tty rss etime cmd_full <<<"$line"

[[ "$pid" =~ ^[0-9]+$ ]] || continue
[[ "$rss" =~ ^[0-9]+$ ]] || rss=0
Expand Down Expand Up @@ -248,6 +255,16 @@ cmd_kill_runaways() {
violation="runtime ${age_seconds}s > ${runtime_limit}s"
fi

# Orphan shellcheck reaper: if parent is PID 1 (reparented because the
# language server that spawned it exited) and alive >120s, kill it.
# Nobody is reading the output, so the work is wasted CPU.
if [[ -z "$violation" ]] && [[ "$cmd_base" == "shellcheck" ]]; then
[[ "$ppid" =~ ^[0-9]+$ ]] || ppid=0
if [[ "$ppid" -eq 1 ]] && [[ "$age_seconds" -gt 120 ]]; then
violation="orphan (ppid=1, age=${age_seconds}s)"
fi
fi

if [[ -n "$violation" ]]; then
local rss_mb=$((rss / 1024))
echo "Killing PID $pid ($cmd_base) — $violation"
Expand All @@ -260,7 +277,7 @@ cmd_kill_runaways() {
killed=$((killed + 1))
total_freed_mb=$((total_freed_mb + rss_mb))
fi
done < <(ps axo pid,tty,rss,etime,command | grep -E 'opencode|shellcheck|node.*opencode' | grep -v grep || true)
done < <(ps axo pid,ppid,tty,rss,etime,command | grep -E 'opencode|shellcheck|node.*opencode' | grep -v grep || true)

if [[ "$killed" -gt 0 ]]; then
echo "Killed $killed process(es), freed ~${total_freed_mb}MB"
Expand Down Expand Up @@ -302,9 +319,9 @@ cmd_status() {

while IFS= read -r line; do
[[ -z "$line" ]] && continue
# Fields: pid, tty, rss, etime, command (command is last — may contain spaces)
local pid tty rss etime cmd_full
read -r pid tty rss etime cmd_full <<<"$line"
# Fields: pid, ppid, tty, rss, etime, command (command is last — may contain spaces)
local pid ppid tty rss etime cmd_full
read -r pid ppid tty rss etime cmd_full <<<"$line"
[[ "$rss" =~ ^[0-9]+$ ]] || rss=0
total_rss_kb=$((total_rss_kb + rss))
process_count=$((process_count + 1))
Expand All @@ -326,8 +343,13 @@ cmd_status() {
fi
if [[ "$rss" -gt "$rss_limit" ]] || [[ "$age_seconds" -gt "$runtime_limit" ]]; then
violations=$((violations + 1))
elif [[ "$cmd_base" == "shellcheck" ]]; then
[[ "$ppid" =~ ^[0-9]+$ ]] || ppid=0
if [[ "$ppid" -eq 1 ]] && [[ "$age_seconds" -gt 120 ]]; then
violations=$((violations + 1))
fi
fi
done < <(ps axo pid,tty,rss,etime,command | grep -E 'opencode|shellcheck|node.*opencode' | grep -v grep || true)
done < <(ps axo pid,ppid,tty,rss,etime,command | grep -E 'opencode|shellcheck|node.*opencode' | grep -v grep || true)

local session_count
session_count=$(ps axo tty,command | awk '
Expand Down
159 changes: 150 additions & 9 deletions .agents/scripts/shellcheck-wrapper.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,30 @@
# --external-sources to prevent any residual source-following expansion.
#
# This wrapper strips --external-sources from the arguments before passing them
# to the real ShellCheck binary. It also enforces a memory limit via ulimit.
# to the real ShellCheck binary. It also enforces a memory limit via ulimit,
# limits concurrent shellcheck processes, and debounces repeated checks on
# unchanged files.
#
# Usage:
# Set SHELLCHECK_PATH to this script's path, or place it earlier on PATH as
# "shellcheck". The bash language server will use it instead of the real binary.
#
# Environment variables:
# SHELLCHECK_REAL_PATH — Path to the real shellcheck binary (auto-detected)
# SHELLCHECK_VMEM_MB — Virtual memory limit in MB (default: 2048)
# SHELLCHECK_REAL_PATH — Path to the real shellcheck binary (auto-detected)
# SHELLCHECK_VMEM_MB — Virtual memory limit in MB (default: 2048)
# SHELLCHECK_MAX_PARALLEL — Max concurrent shellcheck processes (default: 4)
# SHELLCHECK_DEBOUNCE_SEC — Skip re-check if file unchanged within N sec (default: 10)
#
# GH#2915: https://github.com/marcusquinn/aidevops/issues/2915

set -uo pipefail

# --- Configuration ---
_SC_MAX_PARALLEL="${SHELLCHECK_MAX_PARALLEL:-4}"
_SC_DEBOUNCE_SEC="${SHELLCHECK_DEBOUNCE_SEC:-10}"
_SC_CACHE_DIR="/tmp/shellcheck-wrapper-cache"
_SC_LOCK_DIR="/tmp/shellcheck-wrapper-locks"

# --- Find the real ShellCheck binary ---
_find_real_shellcheck() {
local real_path="${SHELLCHECK_REAL_PATH:-}"
Expand All @@ -31,7 +41,27 @@ _find_real_shellcheck() {
return 0
fi

# Search PATH, skipping this wrapper script
# Fast path: check .real sibling of this script's location first.
# When setup.sh replaces the binary, it moves it to shellcheck.real.
# This avoids expensive PATH scanning and realpath resolution.
local self_dir
self_dir="$(dirname "${BASH_SOURCE[0]}" 2>/dev/null || echo ".")"
local sibling="${self_dir}/shellcheck.real"
if [[ -x "$sibling" ]]; then
printf '%s' "$sibling"
return 0
fi

# Fast path: check common .real locations before PATH scanning
local loc
for loc in /opt/homebrew/bin/shellcheck.real /usr/local/bin/shellcheck.real; do
if [[ -x "$loc" ]]; then
printf '%s' "$loc"
return 0
fi
done

# Slow path: search PATH, skipping this wrapper script
local self
self="$(realpath "${BASH_SOURCE[0]}" 2>/dev/null || readlink -f "${BASH_SOURCE[0]}" 2>/dev/null || echo "${BASH_SOURCE[0]}")"

Expand All @@ -48,9 +78,8 @@ _find_real_shellcheck() {
fi
done <<<"$PATH"

# Common locations (including .real suffix for binary-replacement installs)
local loc
for loc in /opt/homebrew/bin/shellcheck.real /opt/homebrew/bin/shellcheck /usr/local/bin/shellcheck.real /usr/local/bin/shellcheck /usr/bin/shellcheck; do
# Last resort: common locations without .real suffix
for loc in /opt/homebrew/bin/shellcheck /usr/local/bin/shellcheck /usr/bin/shellcheck; do
if [[ -x "$loc" ]]; then
local resolved
resolved="$(realpath "$loc" 2>/dev/null || readlink -f "$loc" 2>/dev/null || echo "$loc")"
Expand All @@ -65,7 +94,7 @@ _find_real_shellcheck() {
return 1
}

# --- Filter arguments ---
# --- Filter arguments and extract target file ---
_filter_args() {
local args=()
while [[ $# -gt 0 ]]; do
Expand All @@ -82,6 +111,85 @@ _filter_args() {
printf '%s\n' "${args[@]}"
}

# --- Debounce: skip if file unchanged since last check ---
# Returns 0 if check should be skipped (file unchanged), 1 if check needed.
# Reads stdin for the target file path when called from a pipe.
_debounce_check() {
local target_file="$1"

# Only debounce for real files (not stdin "-")
[[ -z "$target_file" || "$target_file" == "-" ]] && return 1
[[ -f "$target_file" ]] || return 1

mkdir -p "$_SC_CACHE_DIR" 2>/dev/null || return 1

# Hash the absolute path for the cache key
local cache_key
cache_key=$(printf '%s' "$target_file" | cksum | awk '{print $1}')
local cache_file="${_SC_CACHE_DIR}/${cache_key}"

# Get file modification time (seconds since epoch)
local file_mtime
if [[ "$(uname)" == "Darwin" ]]; then
file_mtime=$(stat -f '%m' "$target_file" 2>/dev/null) || return 1
else
file_mtime=$(stat -c '%Y' "$target_file" 2>/dev/null) || return 1
fi

# Check cache: if cache exists, file hasn't changed, and within debounce window
if [[ -f "$cache_file" ]]; then
local cached_mtime cached_time
read -r cached_mtime cached_time 2>/dev/null <"$cache_file" || return 1
local now
now=$(date +%s)
if [[ "$cached_mtime" == "$file_mtime" ]] && [[ $((now - cached_time)) -lt $_SC_DEBOUNCE_SEC ]]; then
# File unchanged and within debounce window — skip
return 0
fi
fi

# Update cache with current mtime and timestamp
printf '%s %s' "$file_mtime" "$(date +%s)" >"$cache_file" 2>/dev/null || true
return 1
}

# --- Concurrency limiter: acquire a slot or exit ---
# Uses mkdir-based locks (atomic on all filesystems).
# Returns 0 if slot acquired (caller must release), 1 if all slots busy.
_acquire_slot() {
mkdir -p "$_SC_LOCK_DIR" 2>/dev/null || return 1

local slot
for slot in $(seq 1 "$_SC_MAX_PARALLEL"); do
local lock="${_SC_LOCK_DIR}/slot-${slot}"
if mkdir "$lock" 2>/dev/null; then
# Got a slot — record PID for stale lock cleanup
printf '%s' "$$" >"${lock}/pid" 2>/dev/null || true
printf '%s' "$slot"
return 0
fi
# Check if the lock holder is still alive (stale lock cleanup)
local holder_pid
holder_pid=$(cat "${lock}/pid" 2>/dev/null) || continue
if [[ -n "$holder_pid" ]] && ! kill -0 "$holder_pid" 2>/dev/null; then
# Stale lock — remove and retry
rm -rf "$lock" 2>/dev/null || true
if mkdir "$lock" 2>/dev/null; then
printf '%s' "$$" >"${lock}/pid" 2>/dev/null || true
printf '%s' "$slot"
return 0
fi
fi
done
return 1
}

_release_slot() {
local slot="$1"
rm -rf "${_SC_LOCK_DIR}/slot-${slot}" 2>/dev/null || true
return 0
}

# --- Main ---
main() {
local real_shellcheck
Expand All @@ -93,13 +201,46 @@ main() {
filtered_args+=("$arg")
done < <(_filter_args "$@")

# Find the target file (last non-flag argument, or "-" for stdin)
local target_file=""
local i
for i in "${filtered_args[@]+"${filtered_args[@]}"}"; do
case "$i" in
-*) ;; # skip flags
*) target_file="$i" ;; # last positional arg is the file
esac
done

# Debounce: skip if file unchanged since last check
if [[ -n "$target_file" ]] && _debounce_check "$target_file"; then
exit 0
fi

# Concurrency limit: wait briefly for a slot, then give up
local slot=""
local attempt
for attempt in 1 2 3; do
slot=$(_acquire_slot) && break
slot=""
sleep 1
done

if [[ -z "$slot" ]]; then
# All slots busy after 3 attempts — exit silently.
# No output = no diagnostics, which is better than 100 competing processes.
exit 0
fi

# Ensure slot is released on exit (normal, error, or signal)
trap '_release_slot "'"$slot"'"' EXIT

# Enforce memory limit (soft limit — ShellCheck can still be killed by the
# memory pressure monitor if it exceeds this, but this prevents the worst case)
local vmem_mb="${SHELLCHECK_VMEM_MB:-2048}"
local vmem_kb=$((vmem_mb * 1024))
ulimit -v "$vmem_kb" 2>/dev/null || true

exec "$real_shellcheck" "${filtered_args[@]+"${filtered_args[@]}"}"
"$real_shellcheck" "${filtered_args[@]+"${filtered_args[@]}"}"
}

main "$@"
Loading