diff --git a/.agents/scripts/memory-pressure-monitor.sh b/.agents/scripts/memory-pressure-monitor.sh index a4640cbd1..88f1e8a1a 100755 --- a/.agents/scripts/memory-pressure-monitor.sh +++ b/.agents/scripts/memory-pressure-monitor.sh @@ -19,6 +19,10 @@ # .shellcheckrc causing recursive expansion) has been removed — SC1091 is # now globally disabled instead. This monitor remains as defense-in-depth. # +# The primary defense is now shellcheck-wrapper.sh which has a background +# RSS watchdog (kills at 1 GB) and respawn rate limiting (exponential backoff). +# This monitor is the secondary safety net with higher thresholds (2 GB). +# # kern.memorystatus_level is a secondary/informational signal only. macOS runs # fine with compression + swap; aggressive thresholds on that metric cause false # alarms. The primary signals are process-level. @@ -32,8 +36,8 @@ # memory-pressure-monitor.sh --help # Show usage # # Process-level thresholds (primary signals): -# RSS per process: > 2 GB → warning, > 4 GB → critical (kill candidate) -# Runtime (tools): > 10 min for shellcheck, > 30 min for other tools +# RSS per process: > 1 GB → warning, > 2 GB → critical (kill candidate) +# Runtime (tools): > 5 min for shellcheck, > 30 min for other tools # Runtime (apps): skipped — long-running by design # Session count: > 8 concurrent interactive sessions → warning # Total aidevops: > 8 GB aggregate RSS → warning @@ -43,9 +47,9 @@ # Swap file count: > 10 → informational warning # # Environment: -# PROCESS_RSS_WARN_MB Per-process RSS warning (default: 2048) -# PROCESS_RSS_CRIT_MB Per-process RSS critical (default: 4096) -# SHELLCHECK_RUNTIME_MAX ShellCheck max runtime in seconds (default: 600) +# PROCESS_RSS_WARN_MB Per-process RSS warning (default: 1024) +# PROCESS_RSS_CRIT_MB Per-process RSS critical (default: 2048) +# SHELLCHECK_RUNTIME_MAX ShellCheck max runtime in seconds (default: 300) # TOOL_RUNTIME_MAX Other tool max runtime in seconds (default: 1800) # SESSION_COUNT_WARN Interactive session warning threshold (default: 8) # AGGREGATE_RSS_WARN_MB Total aidevops RSS warning (default: 8192) @@ -59,14 +63,18 @@ set -euo pipefail # --- Configuration ----------------------------------------------------------- readonly SCRIPT_NAME="memory-pressure-monitor" -readonly SCRIPT_VERSION="2.0.0" +readonly SCRIPT_VERSION="2.1.0" # Per-process RSS thresholds (MB) -PROCESS_RSS_WARN_MB="${PROCESS_RSS_WARN_MB:-2048}" -PROCESS_RSS_CRIT_MB="${PROCESS_RSS_CRIT_MB:-4096}" +# Lowered from 2048/4096 after Mar 7 crash: shellcheck grew to 18.5 GB in <60s. +# The wrapper's watchdog kills at 1 GB; these are the secondary safety net. +PROCESS_RSS_WARN_MB="${PROCESS_RSS_WARN_MB:-1024}" +PROCESS_RSS_CRIT_MB="${PROCESS_RSS_CRIT_MB:-2048}" # Runtime thresholds (seconds) -SHELLCHECK_RUNTIME_MAX="${SHELLCHECK_RUNTIME_MAX:-600}" # 10 min +# Lowered shellcheck from 600s to 300s — wrapper has 120s hard timeout, +# so any shellcheck surviving 5 min has bypassed the wrapper. +SHELLCHECK_RUNTIME_MAX="${SHELLCHECK_RUNTIME_MAX:-300}" # 5 min TOOL_RUNTIME_MAX="${TOOL_RUNTIME_MAX:-1800}" # 30 min # Session/aggregate thresholds @@ -131,9 +139,9 @@ _validate_int() { return 0 } -PROCESS_RSS_WARN_MB=$(_validate_int PROCESS_RSS_WARN_MB "$PROCESS_RSS_WARN_MB" 2048 256) -PROCESS_RSS_CRIT_MB=$(_validate_int PROCESS_RSS_CRIT_MB "$PROCESS_RSS_CRIT_MB" 4096 512) -SHELLCHECK_RUNTIME_MAX=$(_validate_int SHELLCHECK_RUNTIME_MAX "$SHELLCHECK_RUNTIME_MAX" 600 60) +PROCESS_RSS_WARN_MB=$(_validate_int PROCESS_RSS_WARN_MB "$PROCESS_RSS_WARN_MB" 1024 256) +PROCESS_RSS_CRIT_MB=$(_validate_int PROCESS_RSS_CRIT_MB "$PROCESS_RSS_CRIT_MB" 2048 512) +SHELLCHECK_RUNTIME_MAX=$(_validate_int SHELLCHECK_RUNTIME_MAX "$SHELLCHECK_RUNTIME_MAX" 300 60) TOOL_RUNTIME_MAX=$(_validate_int TOOL_RUNTIME_MAX "$TOOL_RUNTIME_MAX" 1800 120) SESSION_COUNT_WARN=$(_validate_int SESSION_COUNT_WARN "$SESSION_COUNT_WARN" 8 2) AGGREGATE_RSS_WARN_MB=$(_validate_int AGGREGATE_RSS_WARN_MB "$AGGREGATE_RSS_WARN_MB" 8192 1024) @@ -143,6 +151,8 @@ readonly COOLDOWN_SECS DAEMON_INTERVAL # --- Helpers ------------------------------------------------------------------ +# Log a timestamped message to the log file +# Arguments: $1=level (INFO|WARN|CRIT), remaining args=message text log_msg() { local level="$1" shift @@ -152,6 +162,7 @@ log_msg() { return 0 } +# Create required log and state directories if they don't exist ensure_dirs() { mkdir -p "${LOG_DIR}" "${STATE_DIR}" return 0 @@ -225,6 +236,8 @@ check_cooldown() { return 0 } +# Record current time as cooldown start for a notification category +# Arguments: $1=category name (used as filename suffix) set_cooldown() { local category="$1" local cooldown_file="${STATE_DIR}/memory-pressure-${category}.cooldown" @@ -232,6 +245,8 @@ set_cooldown() { return 0 } +# Remove cooldown file for a category, allowing immediate re-notification +# Arguments: $1=category name (used as filename suffix) clear_cooldown() { local category="$1" local cooldown_file="${STATE_DIR}/memory-pressure-${category}.cooldown" @@ -347,14 +362,20 @@ _is_app_process() { # Collect all monitored processes with their RSS and runtime # Output: one line per process: PID|RSS_MB|RUNTIME_SECS|COMMAND_NAME|FULL_COMMAND +# +# Pattern matching: MONITORED_PATTERNS are matched against the COMMAND BASENAME +# only (not the full command line with arguments). This prevents false positives +# like zsh processes whose arguments contain "opencode" (e.g., `zsh -l -c opencode`). _collect_monitored_processes() { + # Collect all processes once, then filter by pattern against basename + local ps_output + ps_output=$(ps axo pid=,rss=,command= 2>/dev/null || true) + + # Track PIDs we've already emitted to avoid duplicates from overlapping patterns + local -a seen_pids=() + local pattern for pattern in "${MONITORED_PATTERNS[@]}"; do - # Use ps to get PID, RSS (KB), and command - # Filter by pattern, exclude grep itself and this script - local ps_output - ps_output=$(ps axo pid=,rss=,command= 2>/dev/null | grep -iE "$pattern" | grep -v "grep" | grep -v "${SCRIPT_NAME}" || true) - while IFS= read -r line; do [[ -z "$line" ]] && continue # Parse with read builtin — avoids spawning echo/awk/cut subshells per line @@ -365,18 +386,60 @@ _collect_monitored_processes() { [[ "$pid" =~ ^[0-9]+$ ]] || continue [[ "$rss_kb" =~ ^[0-9]+$ ]] || rss_kb=0 - local rss_mb=$((rss_kb / 1024)) - local runtime - runtime=$(_get_process_age "$pid") - - # Extract short command name via parameter expansion (no subshell) + # Extract short command name (basename of the executable path) local cmd_path="${cmd%% *}" local cmd_name cmd_name=$(basename "$cmd_path" 2>/dev/null || echo "unknown") + # Match pattern against the command basename, NOT the full command line. + # This prevents false positives like `zsh -l -c "opencode"` matching + # the "opencode" pattern — zsh is not an opencode process. + # Exception: patterns containing ".*" (regex) are matched against full + # command for cases like "node.*language-server". + local match=false + if [[ "$pattern" == *".*"* ]]; then + # Regex pattern — match against full command line + if echo "$cmd" | grep -iqE "$pattern"; then + match=true + fi + else + # Simple pattern — match against basename only + local cmd_lower pattern_lower + cmd_lower=$(printf '%s' "$cmd_name" | tr '[:upper:]' '[:lower:]') + pattern_lower=$(printf '%s' "$pattern" | tr '[:upper:]' '[:lower:]') + if [[ "$cmd_lower" == *"$pattern_lower"* ]]; then + match=true + fi + fi + + if [[ "$match" != "true" ]]; then + continue + fi + + # Skip grep, this script, and already-seen PIDs + if [[ "$cmd_name" == "grep" ]] || [[ "$cmd" == *"${SCRIPT_NAME}"* ]]; then + continue + fi + local seen_pid + local is_dup=false + for seen_pid in "${seen_pids[@]+"${seen_pids[@]}"}"; do + if [[ "$seen_pid" == "$pid" ]]; then + is_dup=true + break + fi + done + if [[ "$is_dup" == "true" ]]; then + continue + fi + seen_pids+=("$pid") + + local rss_mb=$((rss_kb / 1024)) + local runtime + runtime=$(_get_process_age "$pid") + printf '%s|%s|%s|%s|%s\n' "$pid" "$rss_mb" "$runtime" "$cmd_name" "$cmd" done <<<"$ps_output" - done | sort -t'|' -k2 -rn | uniq + done | sort -t'|' -k2 -rn return 0 } @@ -611,6 +674,8 @@ do_check() { # --- Commands ----------------------------------------------------------------- +# Run a single check pass — collect processes, evaluate thresholds, notify/kill +# Returns: 0=ok, 1=warnings found, 2=critical findings cmd_check() { # do_check returns non-zero for warnings/critical — that's informational, # not a script failure. Capture the exit code for callers that want it. @@ -619,6 +684,7 @@ cmd_check() { return "$exit_code" } +# Print detailed status of all monitored processes, sessions, and OS memory cmd_status() { ensure_dirs @@ -731,16 +797,28 @@ cmd_status() { return 0 } +# Run continuous monitoring loop with adaptive polling (faster when shellcheck detected) cmd_daemon() { - echo "[${SCRIPT_NAME}] Starting daemon mode (interval: ${DAEMON_INTERVAL}s)" + echo "[${SCRIPT_NAME}] Starting daemon mode (interval: ${DAEMON_INTERVAL}s, fast: 10s when shellcheck detected)" echo "[${SCRIPT_NAME}] Press Ctrl+C to stop" while true; do - cmd_check || true - sleep "${DAEMON_INTERVAL}" + local check_exit=0 + cmd_check || check_exit=$? + + # Adaptive polling: if shellcheck processes are running, poll every 10s + # instead of the normal 60s interval. ShellCheck can grow from 0 to 18 GB + # in under 60s (observed Mar 7 crash), so the normal interval is too slow. + local interval="$DAEMON_INTERVAL" + if pgrep -x shellcheck >/dev/null 2>&1; then + interval=10 + fi + + sleep "$interval" done } +# Install launchd plist for periodic monitoring (every 30 seconds) cmd_install() { # Resolve script path — prefer installed location local script_path @@ -770,7 +848,7 @@ cmd_install() { ${script_path} StartInterval - 60 + 30 StandardOutPath ${home_escaped}/.aidevops/logs/memory-pressure-launchd.log StandardErrorPath @@ -803,10 +881,11 @@ EOF echo "Installed and loaded: ${LAUNCHD_LABEL}" echo "Plist: ${PLIST_PATH}" echo "Log: ${LOG_FILE}" - echo "Check interval: 60 seconds" + echo "Check interval: 30 seconds" return 0 } +# Remove launchd plist and clean up state files cmd_uninstall() { if [[ -f "${PLIST_PATH}" ]]; then launchctl bootout "gui/$(id -u)" "${PLIST_PATH}" 2>/dev/null || true @@ -822,6 +901,7 @@ cmd_uninstall() { return 0 } +# Display usage information and current configuration cmd_help() { cat <&2 + printf '%s' "$default" + return 0 + fi + local canonical=$((10#$value)) + if ((canonical < min)); then + echo "shellcheck-wrapper: WARN: ${name}=${canonical} below minimum ${min}, using ${default}" >&2 + printf '%s' "$default" + return 0 + fi + printf '%s' "$canonical" + return 0 +} + +# --- Configuration --- +RSS_LIMIT_MB="$(_validate_int SHELLCHECK_RSS_LIMIT_MB "${SHELLCHECK_RSS_LIMIT_MB:-1024}" 1024 128)" +readonly RSS_LIMIT_MB +WATCHDOG_INTERVAL="$(_validate_int SHELLCHECK_WATCHDOG_SEC "${SHELLCHECK_WATCHDOG_SEC:-2}" 2 1)" +readonly WATCHDOG_INTERVAL +HARD_TIMEOUT="$(_validate_int SHELLCHECK_TIMEOUT_SEC "${SHELLCHECK_TIMEOUT_SEC:-120}" 120 10)" +readonly HARD_TIMEOUT +readonly BACKOFF_DIR="${SHELLCHECK_BACKOFF_DIR:-${HOME}/.aidevops/.agent-workspace/tmp}" +readonly BACKOFF_FILE="${BACKOFF_DIR}/shellcheck-backoff" +readonly MAX_BACKOFF=300 # 5 minutes max backoff + # --- Find the real ShellCheck binary --- _find_real_shellcheck() { local real_path="${SHELLCHECK_REAL_PATH:-}" @@ -82,7 +121,142 @@ _filter_args() { printf '%s\n' "${args[@]}" } +# --- Respawn rate limiter --- +# Tracks recent kills via a state file. If shellcheck was killed recently, +# delay before allowing the next invocation. Uses exponential backoff: +# 1st kill: 5s, 2nd: 10s, 3rd: 20s, ... up to MAX_BACKOFF (300s). +# Resets after MAX_BACKOFF seconds of no kills. +_check_rate_limit() { + mkdir -p "$BACKOFF_DIR" || return 0 + + if [[ ! -f "$BACKOFF_FILE" ]]; then + return 0 + fi + + local kill_count last_kill_time + # File format: "kill_count timestamp" + read -r kill_count last_kill_time <"$BACKOFF_FILE" 2>/dev/null || return 0 + + # Validate values are numeric + [[ "$kill_count" =~ ^[0-9]+$ ]] || return 0 + [[ "$last_kill_time" =~ ^[0-9]+$ ]] || return 0 + + local now + now=$(date +%s) + local elapsed=$((now - last_kill_time)) + + # Reset if enough time has passed since last kill + if [[ "$elapsed" -gt "$MAX_BACKOFF" ]]; then + rm -f "$BACKOFF_FILE" + return 0 + fi + + # Calculate required backoff: 5 * 2^(kill_count-1), capped at MAX_BACKOFF + local backoff=5 + local i + for ((i = 1; i < kill_count && backoff < MAX_BACKOFF; i++)); do + backoff=$((backoff * 2)) + done + if [[ "$backoff" -gt "$MAX_BACKOFF" ]]; then + backoff="$MAX_BACKOFF" + fi + + if [[ "$elapsed" -lt "$backoff" ]]; then + local remaining=$((backoff - elapsed)) + # Return empty output (no diagnostics) instead of blocking + # This prevents the language server from hanging while still + # protecting against the kill-respawn-grow cycle + echo '{"comments":[]}' + return 1 + fi + + return 0 +} + +# Record that a kill happened (called by the watchdog). +# Uses mkdir as an atomic lock to prevent race conditions when multiple +# wrapper instances kill concurrently — without the lock, the read-modify-write +# on $BACKOFF_FILE could produce an incorrect kill_count and shorter backoff. +_record_kill() { + mkdir -p "$BACKOFF_DIR" || return 0 + local lock_dir="${BACKOFF_DIR}/shellcheck.lock" + + # mkdir is atomic — if it fails, another process holds the lock + if ! mkdir "$lock_dir" 2>/dev/null; then + # Another process is updating; skip this increment (safe — the other + # process will record its own kill, so the count stays approximately correct) + return 0 + fi + + # Ensure lock is removed on function exit (including errors) + # shellcheck disable=SC2064 + trap "rmdir '$lock_dir' 2>/dev/null || true" RETURN + + local kill_count=0 + if [[ -f "$BACKOFF_FILE" ]]; then + read -r kill_count _ <"$BACKOFF_FILE" 2>/dev/null || kill_count=0 + [[ "$kill_count" =~ ^[0-9]+$ ]] || kill_count=0 + fi + + kill_count=$((kill_count + 1)) + printf '%s %s\n' "$kill_count" "$(date +%s)" >"$BACKOFF_FILE" + return 0 +} + +# --- RSS watchdog --- +# Runs as a background process, polling the child's RSS every WATCHDOG_INTERVAL +# seconds. Kills the child if RSS exceeds RSS_LIMIT_MB. +# Also enforces a hard timeout. +# +# This replaces ulimit -v which is broken on macOS ARM (Apple Silicon): +# $ ulimit -v 2097152 +# zsh:ulimit:2: setrlimit failed: invalid argument +# macOS ARM kernels don't support RLIMIT_AS (virtual memory limit). +# The watchdog approach is more reliable: it checks actual RSS (physical memory) +# rather than virtual memory, and works on all platforms. +_start_watchdog() { + local child_pid="$1" + local rss_limit_kb=$((RSS_LIMIT_MB * 1024)) + local start_time + start_time=$(date +%s) + + while kill -0 "$child_pid" 2>/dev/null; do + sleep "$WATCHDOG_INTERVAL" + + # Check if child still exists + if ! kill -0 "$child_pid" 2>/dev/null; then + break + fi + + # Get RSS in KB (macOS ps reports in KB by default) + local rss_kb + rss_kb=$(ps -o rss= -p "$child_pid" 2>/dev/null | tr -d ' ') || break + [[ "$rss_kb" =~ ^[0-9]+$ ]] || continue + + # Check RSS limit + if [[ "$rss_kb" -gt "$rss_limit_kb" ]]; then + local rss_mb=$((rss_kb / 1024)) + echo "shellcheck-wrapper: WATCHDOG: killing PID ${child_pid} — RSS ${rss_mb} MB exceeds ${RSS_LIMIT_MB} MB limit" >&2 + kill -KILL "$child_pid" 2>/dev/null || true + _record_kill + break + fi + + # Check hard timeout + local now + now=$(date +%s) + local elapsed=$((now - start_time)) + if [[ "$elapsed" -gt "$HARD_TIMEOUT" ]]; then + echo "shellcheck-wrapper: WATCHDOG: killing PID ${child_pid} — exceeded ${HARD_TIMEOUT}s timeout" >&2 + kill -KILL "$child_pid" 2>/dev/null || true + _record_kill + break + fi + done +} + # --- Main --- +# Entry point: find real shellcheck, filter args, check rate limit, run with watchdog main() { local real_shellcheck real_shellcheck="$(_find_real_shellcheck)" || exit 1 @@ -93,13 +267,32 @@ main() { filtered_args+=("$arg") done < <(_filter_args "$@") - # Enforce memory limit (soft limit — ShellCheck can still be killed by the - # memory pressure monitor if it exceeds this, but this prevents the worst case) - local vmem_mb="${SHELLCHECK_VMEM_MB:-2048}" - local vmem_kb=$((vmem_mb * 1024)) - ulimit -v "$vmem_kb" 2>/dev/null || true + # Check respawn rate limit — if we were recently killed, return empty + # results instead of running (prevents kill-respawn-grow cycle) + if ! _check_rate_limit; then + exit 0 + fi + + # Try ulimit -v as a first layer (works on Linux, no-op on macOS ARM) + ulimit -v $((RSS_LIMIT_MB * 1024)) 2>/dev/null || true + + # Run shellcheck in background with RSS watchdog + "$real_shellcheck" "${filtered_args[@]}" & + local sc_pid=$! + + # Start watchdog in background + _start_watchdog "$sc_pid" & + local wd_pid=$! + + # Wait for shellcheck to finish (or be killed by watchdog) + wait "$sc_pid" 2>/dev/null + local sc_exit=$? + + # Clean up watchdog + kill "$wd_pid" 2>/dev/null || true + wait "$wd_pid" 2>/dev/null || true - exec "$real_shellcheck" "${filtered_args[@]}" + exit "$sc_exit" } main "$@" diff --git a/setup-modules/shell-env.sh b/setup-modules/shell-env.sh index e20c37637..9eaeaac27 100644 --- a/setup-modules/shell-env.sh +++ b/setup-modules/shell-env.sh @@ -9,6 +9,7 @@ IFS=$'\n\t' trap 'rc=$?; echo "[ERROR] ${BASH_SOURCE[0]}:${LINENO} exit $rc" >&2' ERR shopt -s inherit_errexit 2>/dev/null || true +# Detect the shell currently executing this script (zsh, bash, or fallback) detect_running_shell() { if [[ -n "${ZSH_VERSION:-}" ]]; then echo "zsh" @@ -20,6 +21,7 @@ detect_running_shell() { return 0 } +# Detect the user's default login shell from $SHELL detect_default_shell() { basename "${SHELL:-/bin/bash}" return 0 @@ -61,6 +63,7 @@ get_shell_rc() { return 0 } +# Return all relevant shell rc file paths for the current platform get_all_shell_rcs() { local rcs=() @@ -88,6 +91,7 @@ get_all_shell_rcs() { return 0 } +# Offer to install Oh My Zsh if zsh is the default shell and OMZ is not present setup_oh_my_zsh() { # Only relevant if zsh is available if ! command -v zsh >/dev/null 2>&1; then @@ -155,6 +159,7 @@ setup_oh_my_zsh() { return 0 } +# Extract portable customizations from bash configs into a shared profile for cross-shell use setup_shell_compatibility() { print_info "Setting up cross-shell compatibility..." @@ -375,6 +380,7 @@ setup_shell_compatibility() { return 0 } +# Check for optional dependencies (sshpass) and offer to install them check_optional_deps() { print_info "Checking optional dependencies..." @@ -411,6 +417,7 @@ check_optional_deps() { return 0 } +# Add ~/.local/bin to PATH in all shell rc files for the aidevops CLI add_local_bin_to_path() { # shellcheck disable=SC2016 # path_line is written to rc files; must expand at shell startup, not now local path_line='export PATH="$HOME/.local/bin:$PATH"' @@ -466,7 +473,8 @@ add_local_bin_to_path() { # # The bash language server hardcodes --external-sources in every ShellCheck # invocation, causing exponential memory growth (9+ GB) when source chains -# span 463+ scripts. The wrapper strips --external-sources and enforces ulimit. +# span 463+ scripts. The wrapper strips --external-sources and enforces a +# background RSS watchdog (ulimit -v is broken on macOS ARM — EINVAL). # # GH#2915 set SHELLCHECK_PATH env var, but bash-language-server ignores it — # it resolves `shellcheck` via PATH lookup, finding /opt/homebrew/bin/shellcheck @@ -482,6 +490,11 @@ add_local_bin_to_path() { # "shellcheck". By placing ~/.aidevops/bin first on PATH with a symlink to # the wrapper, the language server finds the wrapper instead of the real binary. # Layers 1-3 are retained for tools that honour SHELLCHECK_PATH. +# +# CRITICAL: ~/.aidevops/bin MUST be at the START of PATH, not the end. +# If it appears after /opt/homebrew/bin, the real shellcheck is found first +# and the wrapper is bypassed entirely. The launchctl setenv always prepends, +# and the case-guard in shell rc files ensures it stays first. setup_shellcheck_wrapper() { local wrapper_path="$HOME/.aidevops/agents/scripts/shellcheck-wrapper.sh" @@ -505,13 +518,16 @@ setup_shellcheck_wrapper() { # shellcheck disable=SC2016 # env_line is written to rc files; must expand at shell startup env_line='export SHELLCHECK_PATH="$HOME/.aidevops/agents/scripts/shellcheck-wrapper.sh"' # shellcheck disable=SC2016 # path_line is written to rc files; must expand at shell startup - # Use case guard so repeated sourcing (e.g. nested shells) doesn't duplicate PATH entries - local path_line='case ":$PATH:" in *":$HOME/.aidevops/bin:"*) ;; *) export PATH="$HOME/.aidevops/bin:$PATH" ;; esac' + # Sanitize-and-prepend: strip any existing occurrence of the shim dir from PATH + # (it may be at the END from a previous setup run), then prepend it. This ensures + # the shim is always first, even on machines upgrading from the old append form. + # The ${PATH:+:$PATH} guard handles the empty-PATH edge case without a trailing colon. + local path_line='_aidevops_shim="$HOME/.aidevops/bin"; PATH="$(printf '\''%s'\'' "$PATH" | tr '\'':'\'' '\''\n'\'' | grep -Fxv -- "$_aidevops_shim" | paste -sd: -)"; export PATH="$_aidevops_shim${PATH:+:$PATH}"; unset _aidevops_shim' # Fish shell uses different syntax (set -gx instead of export) # shellcheck disable=SC2016 # fish lines are written to config.fish; must expand at shell startup local env_line_fish='set -gx SHELLCHECK_PATH "$HOME/.aidevops/agents/scripts/shellcheck-wrapper.sh"' - # shellcheck disable=SC2016 - local path_line_fish='contains -- "$HOME/.aidevops/bin" $PATH; or set -gx PATH "$HOME/.aidevops/bin" $PATH' + # shellcheck disable=SC2016 # fish path line: strip existing, then prepend + local path_line_fish='set -l _aidevops_shim "$HOME/.aidevops/bin"; set -l _aidevops_rest (string match -v -- "$_aidevops_shim" $PATH); set -gx PATH $_aidevops_shim $_aidevops_rest' local added_to="" local already_in="" @@ -549,14 +565,29 @@ setup_shellcheck_wrapper() { # Note: 2>/dev/null on launchctl is intentional — launchctl may not be # available in non-GUI contexts (SSH, containers). Unlike grep where we # want errors visible, launchctl failure is a non-fatal fallback. + # + # CRITICAL: Always prepend shim_dir even if it's already in PATH — it may + # be at the END (e.g., appended by a previous setup run), which means the + # real shellcheck at /opt/homebrew/bin is found first. We strip any existing + # occurrence and prepend to guarantee first position. if [[ "$PLATFORM_MACOS" == "true" ]]; then if launchctl setenv SHELLCHECK_PATH "$wrapper_path" 2>/dev/null; then print_info "Set SHELLCHECK_PATH via launchctl (GUI processes)" fi - if [[ ":$PATH:" != *":$shim_dir:"* ]]; then - if launchctl setenv PATH "$shim_dir:$PATH" 2>/dev/null; then - print_info "Prepended $shim_dir to PATH via launchctl (GUI processes)" - fi + # Build a clean PATH with shim_dir at the front, removing any existing + # occurrence to prevent duplicates while ensuring first position. + # Handle the empty-PATH edge case to avoid a trailing colon (which + # resolves to "." and is a PATH injection vector). + local clean_path + clean_path=$(printf '%s' "$PATH" | tr ':' '\n' | grep -Fxv "$shim_dir" | tr '\n' ':' | sed 's/:$//') + local new_path + if [[ -n "$clean_path" ]]; then + new_path="${shim_dir}:${clean_path}" + else + new_path="${shim_dir}" + fi + if launchctl setenv PATH "$new_path" 2>/dev/null; then + print_info "Prepended $shim_dir to PATH via launchctl (GUI processes)" fi fi @@ -582,8 +613,17 @@ setup_shellcheck_wrapper() { fi # PATH prepend for ~/.aidevops/bin (GH#2993: shim must be on PATH) - # Use exact-line match so stale entries (append-form, comments) don't short-circuit - if ! grep -Fq "$path_line" "$zshenv"; then + # Remove stale old-form entries (case guard that only checked presence, + # not position — left the shim at the end of PATH on upgrades) + # shellcheck disable=SC2016 # Matching literal $PATH text in rc files, not expanding + if grep -q 'case ":$PATH:" in.*\.aidevops/bin' "$zshenv"; then + # Remove the old case-guard line (sed is appropriate here — targeted single-line removal) + # shellcheck disable=SC2016 + sed -i.bak '/case ":$PATH:" in.*\.aidevops\/bin/d' "$zshenv" + rm -f "${zshenv}.bak" + fi + # Use exact-line match for the new sanitize-and-prepend form + if ! grep -Fq '_aidevops_shim' "$zshenv"; then { echo "" echo "# Added by aidevops setup (GH#2993: shellcheck shim on PATH)" @@ -632,8 +672,21 @@ setup_shellcheck_wrapper() { fi # PATH prepend for ~/.aidevops/bin (GH#2993) - # Use exact-line match so stale entries don't short-circuit - if ! grep -Fq "$rc_path_line" "$rc_file"; then + # Remove stale old-form entries (case guard that only checked presence, + # not position — left the shim at the end of PATH on upgrades) + # shellcheck disable=SC2016 # Matching literal $PATH text in rc files, not expanding + if grep -q 'case ":$PATH:" in.*\.aidevops/bin' "$rc_file"; then + # shellcheck disable=SC2016 + sed -i.bak '/case ":$PATH:" in.*\.aidevops\/bin/d' "$rc_file" + rm -f "${rc_file}.bak" + fi + # For fish: also remove old 'contains' form that only checked presence + if [[ "$is_fish_rc" == "true" ]] && grep -q 'contains.*\.aidevops/bin' "$rc_file"; then + sed -i.bak '/contains.*\.aidevops\/bin/d' "$rc_file" + rm -f "${rc_file}.bak" + fi + # Check for the new sanitize-and-prepend form (uses _aidevops_shim variable) + if ! grep -Fq '_aidevops_shim' "$rc_file"; then { echo "" echo "# Added by aidevops setup (GH#2993: shellcheck shim on PATH)" @@ -662,6 +715,7 @@ setup_shellcheck_wrapper() { return 0 } +# Add server access aliases to shell rc files (bash/zsh/fish) setup_aliases() { print_info "Setting up shell aliases..." @@ -765,6 +819,7 @@ ALIASES return 0 } +# Install terminal title integration that syncs tab titles with git repo/branch setup_terminal_title() { print_info "Setting up terminal title integration..."