diff --git a/.agents/scripts/commands/pulse.md b/.agents/scripts/commands/pulse.md index a5d5787e0e..3f74da2b40 100644 --- a/.agents/scripts/commands/pulse.md +++ b/.agents/scripts/commands/pulse.md @@ -541,15 +541,20 @@ For each dispatchable issue: RUNNER_USER=$(gh api user --jq '.login' 2>/dev/null || whoami) gh issue edit --repo --add-assignee "$RUNNER_USER" --add-label "status:queued" --remove-label "status:available" 2>/dev/null || true -opencode run --dir --title "Issue #: " \ - "/full-loop Implement issue #<number> (<url>) -- <brief description>" & +~/.aidevops/agents/scripts/headless-runtime-helper.sh run \ + --role worker \ + --session-key "issue-<number>" \ + --dir <path> \ + --title "Issue #<number>: <title>" \ + --prompt "/full-loop Implement issue #<number> (<url>) -- <brief description>" & sleep 2 ``` **Dispatch rules:** -- ALWAYS use `opencode run` — NEVER `claude` or `claude -p` +- ALWAYS use `~/.aidevops/agents/scripts/headless-runtime-helper.sh run` for headless dispatches — NEVER `claude`, `claude -p`, or raw `opencode run` - Background with `&`, sleep 2 between dispatches -- Do NOT add `--model` for first attempts — let `/full-loop` use its default. Bundle presets (t1364.6) handle per-project model defaults automatically. **Exception:** when escalating after 2+ failed attempts on the same issue, add `--model anthropic/claude-opus-4-6` (see "Model escalation after repeated failures" above). +- The helper alternates the default headless providers/models (`anthropic/claude-sonnet-4-6`, `openai/gpt-5.3-codex`), persists session IDs per provider + session key, honors provider backoff, and rejects `opencode/*` gateway models (no Zen fallback for headless runs) +- Do NOT add `--model` for first attempts — let the helper choose the alternating default. **Exception:** when escalating after 2+ failed attempts on the same issue, pass `--model anthropic/claude-opus-4-6` to the helper (see "Model escalation after repeated failures" above). - Use `--dir <path>` from repos.json - Route non-code tasks with `--agent`: SEO, Content, Marketing, Business, Research (see AGENTS.md "Agent Routing") - **Bundle-aware agent routing (t1364.6):** Before dispatching, check if the target repo has a bundle with `agent_routing` overrides. Run `bundle-helper.sh get agent_routing <repo-path>` — if the task domain (code, seo, content, marketing) has a non-default agent, use `--agent <name>`. Example: a content-site bundle routes `marketing` tasks to the Marketing agent instead of Build+. Explicit `--agent` flags in the issue body always override bundle defaults. @@ -567,8 +572,12 @@ sleep 2 # Build lineage block (see headless-dispatch.md for full assembly) # Or use: LINEAGE_BLOCK=$(task-decompose-helper.sh format-lineage "$TASK_ID") - opencode run --dir <path> --title "Issue #<number>: <title>" \ - "/full-loop Implement issue #<number> (<url>) -- <brief description> + ~/.aidevops/agents/scripts/headless-runtime-helper.sh run \ + --role worker \ + --session-key "issue-<number>" \ + --dir <path> \ + --title "Issue #<number>: <title>" \ + --prompt "/full-loop Implement issue #<number> (<url>) -- <brief description> TASK LINEAGE: 0. [parent] ${PARENT_DESC} (${PARENT_ID}) @@ -615,8 +624,12 @@ NEXT_BATCH=$(batch-strategy-helper.sh next-batch \ echo "$NEXT_BATCH" | jq -r '.[]' | while read -r task_id; do # Look up the issue number and repo for this task_id # Then dispatch as normal (see dispatch rules above) - opencode run --dir <path> --title "Issue #<number>: <title>" \ - "/full-loop Implement issue #<number> (<url>) -- <brief description>" & + ~/.aidevops/agents/scripts/headless-runtime-helper.sh run \ + --role worker \ + --session-key "task-${task_id}" \ + --dir <path> \ + --title "Issue #<number>: <title>" \ + --prompt "/full-loop Implement issue #<number> (<url>) -- <brief description>" & sleep 2 done ``` @@ -894,15 +907,23 @@ For each feature in the current milestone with status `pending`: ```bash # Full mode — standard worktree + PR workflow -opencode run --dir <repo_path> --title "Mission <mission_id> - <feature_title>" \ - "/full-loop Implement <task_id> -- <feature_description>. Mission context: <mission_goal>. Milestone: <milestone_name>." & +~/.aidevops/agents/scripts/headless-runtime-helper.sh run \ + --role worker \ + --session-key "mission-<mission_id>-<task_id>" \ + --dir <repo_path> \ + --title "Mission <mission_id> - <feature_title>" \ + --prompt "/full-loop Implement <task_id> -- <feature_description>. Mission context: <mission_goal>. Milestone: <milestone_name>." & sleep 2 ``` ```bash # POC mode — commit directly, skip ceremony -opencode run --dir <repo_path> --title "Mission <mission_id> - <feature_title>" \ - "/full-loop --poc <feature_description>. Mission context: <mission_goal>." & +~/.aidevops/agents/scripts/headless-runtime-helper.sh run \ + --role worker \ + --session-key "mission-<mission_id>-<feature_title>" \ + --dir <repo_path> \ + --title "Mission <mission_id> - <feature_title>" \ + --prompt "/full-loop --poc <feature_description>. Mission context: <mission_goal>." & sleep 2 ``` @@ -910,8 +931,12 @@ sleep 2 ```bash # Mission dispatch with lineage — milestone as parent, features as siblings - opencode run --dir <repo_path> --title "Mission <mission_id> - <feature_title>" \ - "/full-loop Implement <task_id> -- <feature_description>. Mission context: <mission_goal>. + ~/.aidevops/agents/scripts/headless-runtime-helper.sh run \ + --role worker \ + --session-key "mission-<mission_id>-<task_id>" \ + --dir <repo_path> \ + --title "Mission <mission_id> - <feature_title>" \ + --prompt "/full-loop Implement <task_id> -- <feature_description>. Mission context: <mission_goal>. TASK LINEAGE: 0. [milestone] <milestone_name>: <milestone_description> (mission:<mission_id>) @@ -940,8 +965,12 @@ If ALL features in the current milestone have status `completed` (merged PRs exi - Dispatch a validation worker using the milestone's validation criteria: ```bash -opencode run --dir <repo_path> --title "Mission <mission_id> - Validate Milestone <N>" \ - "/full-loop Validate milestone <N> of mission <mission_id>. Validation criteria: <criteria>. Run tests, check build, verify integration. Update mission state file at <path> with pass/fail result." & +~/.aidevops/agents/scripts/headless-runtime-helper.sh run \ + --role worker \ + --session-key "mission-<mission_id>-validate-<N>" \ + --dir <repo_path> \ + --title "Mission <mission_id> - Validate Milestone <N>" \ + --prompt "/full-loop Validate milestone <N> of mission <mission_id>. Validation criteria: <criteria>. Run tests, check build, verify integration. Update mission state file at <path> with pass/fail result." & sleep 2 ``` diff --git a/.agents/scripts/headless-runtime-helper.sh b/.agents/scripts/headless-runtime-helper.sh new file mode 100755 index 0000000000..39d46ec422 --- /dev/null +++ b/.agents/scripts/headless-runtime-helper.sh @@ -0,0 +1,769 @@ +#!/usr/bin/env bash + +# headless-runtime-helper.sh - Provider-aware OpenCode wrapper for pulse/workers +# +# Features: +# - Alternates between configured headless providers/models +# - Persists OpenCode session IDs per provider + session key +# - Records provider backoff state on auth/rate-limit/runtime failures +# - Clears backoff automatically when auth changes or retry windows expire +# - Rejects opencode/* gateway models for headless runs (no Zen fallback) + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" || exit +# shellcheck source=shared-constants.sh +source "${SCRIPT_DIR}/shared-constants.sh" + +readonly DEFAULT_HEADLESS_MODELS="anthropic/claude-sonnet-4-6,openai/gpt-5.3-codex" +readonly STATE_DIR="${AIDEVOPS_HEADLESS_RUNTIME_DIR:-${HOME}/.aidevops/.agent-workspace/headless-runtime}" +readonly STATE_DB="${STATE_DIR}/state.db" +readonly OPENCODE_BIN_DEFAULT="${OPENCODE_BIN:-opencode}" + +init_state_db() { + mkdir -p "$STATE_DIR" 2>/dev/null || true + sqlite3 "$STATE_DB" <<'SQL' >/dev/null 2>&1 +PRAGMA journal_mode=WAL; +PRAGMA busy_timeout=5000; + +CREATE TABLE IF NOT EXISTS provider_backoff ( + provider TEXT PRIMARY KEY, + reason TEXT NOT NULL, + retry_after TEXT DEFAULT '', + auth_signature TEXT DEFAULT '', + details TEXT DEFAULT '', + updated_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now')) +); + +CREATE TABLE IF NOT EXISTS provider_sessions ( + provider TEXT NOT NULL, + session_key TEXT NOT NULL, + session_id TEXT NOT NULL, + model TEXT NOT NULL, + updated_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now')), + PRIMARY KEY (provider, session_key) +); + +CREATE TABLE IF NOT EXISTS provider_rotation ( + role TEXT PRIMARY KEY, + last_provider TEXT NOT NULL, + updated_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now')) +); +SQL + return 0 +} + +db_query() { + local query="$1" + sqlite3 -cmd ".timeout 5000" "$STATE_DB" "$query" 2>/dev/null + return $? +} + +sql_escape() { + local value="$1" + printf '%s' "${value//\'/\'\'}" + return 0 +} + +trim_spaces() { + local value="$1" + value="${value#"${value%%[![:space:]]*}"}" + value="${value%"${value##*[![:space:]]}"}" + printf '%s' "$value" + return 0 +} + +extract_provider() { + local model="$1" + if [[ "$model" == */* ]]; then + printf '%s' "${model%%/*}" + return 0 + fi + return 1 +} + +provider_signature_override_var() { + local provider="$1" + case "$provider" in + anthropic) printf '%s' "AIDEVOPS_HEADLESS_AUTH_SIGNATURE_ANTHROPIC" ;; + openai) printf '%s' "AIDEVOPS_HEADLESS_AUTH_SIGNATURE_OPENAI" ;; + *) printf '%s' "" ;; + esac + return 0 +} + +sha256_text() { + local value="$1" + if command -v shasum >/dev/null 2>&1; then + printf '%s' "$value" | shasum -a 256 | awk '{print $1}' + return 0 + fi + if command -v sha256sum >/dev/null 2>&1; then + printf '%s' "$value" | sha256sum | awk '{print $1}' + return 0 + fi + printf '%s' "$value" + return 0 +} + +file_mtime() { + local path="$1" + if [[ ! -e "$path" ]]; then + printf '%s' "missing" + return 0 + fi + stat -f '%m' "$path" 2>/dev/null || stat -c '%Y' "$path" 2>/dev/null || printf '%s' "unknown" + return 0 +} + +get_auth_signature() { + local provider="$1" + local override_var + override_var=$(provider_signature_override_var "$provider") + if [[ -n "$override_var" && -n "${!override_var:-}" ]]; then + printf '%s' "${!override_var}" + return 0 + fi + + local auth_material="provider=${provider}" + case "$provider" in + anthropic) + local auth_status auth_file auth_mtime + auth_status=$(timeout_sec 10 "$OPENCODE_BIN_DEFAULT" auth status 2>/dev/null || true) + auth_file="${HOME}/.local/share/opencode/auth.json" + auth_mtime=$(file_mtime "$auth_file") + auth_material="${auth_material}|status=${auth_status}|mtime=${auth_mtime}" + ;; + openai) + if [[ -n "${OPENAI_API_KEY:-}" ]]; then + auth_material="${auth_material}|env=$(sha256_text "$OPENAI_API_KEY")" + else + local auth_status + auth_status=$(timeout_sec 10 "$OPENCODE_BIN_DEFAULT" auth status 2>/dev/null || true) + auth_material="${auth_material}|status=${auth_status}|env=missing" + fi + ;; + *) + auth_material="${auth_material}|unknown=true" + ;; + esac + + sha256_text "$auth_material" + return 0 +} + +get_configured_models() { + local configured="${AIDEVOPS_HEADLESS_MODELS:-$DEFAULT_HEADLESS_MODELS}" + local allowlist_raw="${AIDEVOPS_HEADLESS_PROVIDER_ALLOWLIST:-}" + local -a allowlist=() + local -a raw_models=() + local -a models=() + local item provider + + if [[ -n "$allowlist_raw" ]]; then + IFS=',' read -r -a allowlist <<<"$allowlist_raw" + fi + + IFS=',' read -r -a raw_models <<<"$configured" + for item in "${raw_models[@]}"; do + item=$(trim_spaces "$item") + [[ -z "$item" ]] && continue + if [[ "$item" == opencode/* ]]; then + continue + fi + provider=$(extract_provider "$item" 2>/dev/null || printf '%s' "") + [[ -z "$provider" ]] && continue + if [[ ${#allowlist[@]} -gt 0 ]]; then + local allowed=false + local allowed_provider + for allowed_provider in "${allowlist[@]}"; do + allowed_provider=$(trim_spaces "$allowed_provider") + if [[ "$allowed_provider" == "$provider" ]]; then + allowed=true + break + fi + done + [[ "$allowed" == "true" ]] || continue + fi + models+=("$item") + done + + if [[ ${#models[@]} -eq 0 ]]; then + return 1 + fi + + printf '%s\n' "${models[@]}" + return 0 +} + +get_last_provider() { + local role="$1" + db_query "SELECT last_provider FROM provider_rotation WHERE role = '$(sql_escape "$role")';" + return 0 +} + +set_last_provider() { + local role="$1" + local provider="$2" + db_query " +INSERT INTO provider_rotation (role, last_provider, updated_at) +VALUES ('$(sql_escape "$role")', '$(sql_escape "$provider")', strftime('%Y-%m-%dT%H:%M:%SZ', 'now')) +ON CONFLICT(role) DO UPDATE SET + last_provider = excluded.last_provider, + updated_at = excluded.updated_at; +" >/dev/null + return 0 +} + +get_session_id() { + local provider="$1" + local session_key="$2" + db_query "SELECT session_id FROM provider_sessions WHERE provider = '$(sql_escape "$provider")' AND session_key = '$(sql_escape "$session_key")';" + return 0 +} + +store_session_id() { + local provider="$1" + local session_key="$2" + local session_id="$3" + local model="$4" + db_query " +INSERT INTO provider_sessions (provider, session_key, session_id, model, updated_at) +VALUES ( + '$(sql_escape "$provider")', + '$(sql_escape "$session_key")', + '$(sql_escape "$session_id")', + '$(sql_escape "$model")', + strftime('%Y-%m-%dT%H:%M:%SZ', 'now') +) +ON CONFLICT(provider, session_key) DO UPDATE SET + session_id = excluded.session_id, + model = excluded.model, + updated_at = excluded.updated_at; +" >/dev/null + return 0 +} + +clear_provider_backoff() { + local provider="$1" + db_query "DELETE FROM provider_backoff WHERE provider = '$(sql_escape "$provider")';" >/dev/null + return 0 +} + +parse_retry_after_seconds() { + local file_path="$1" + python3 - "$file_path" <<'PY' +import re +import sys +from pathlib import Path + +text = Path(sys.argv[1]).read_text(errors="ignore").lower() +patterns = [ + (r"retry after\s+(\d+)\s*(second|seconds|sec|secs|s)\b", 1), + (r"retry after\s+(\d+)\s*(minute|minutes|min|mins|m)\b", 60), + (r"retry after\s+(\d+)\s*(hour|hours|hr|hrs|h)\b", 3600), + (r"retry after\s+(\d+)\s*(day|days|d)\b", 86400), + (r"try again in\s+(\d+)\s*(second|seconds|sec|secs|s)\b", 1), + (r"try again in\s+(\d+)\s*(minute|minutes|min|mins|m)\b", 60), + (r"try again in\s+(\d+)\s*(hour|hours|hr|hrs|h)\b", 3600), + (r"try again in\s+(\d+)\s*(day|days|d)\b", 86400), +] +for pattern, multiplier in patterns: + match = re.search(pattern, text) + if match: + print(int(match.group(1)) * multiplier) + sys.exit(0) + +numeric = re.search(r"\b429\b", text) +if numeric: + print(900) + sys.exit(0) + +print(0) +PY + return 0 +} + +record_provider_backoff() { + local provider="$1" + local reason="$2" + local details_file="$3" + local details retry_seconds auth_signature retry_after + details=$( + python3 - "$details_file" <<'PY' +from pathlib import Path +import sys +text = Path(sys.argv[1]).read_text(errors="ignore") +text = " ".join(text.split()) +print(text[:400]) +PY + ) + auth_signature=$(get_auth_signature "$provider") + retry_seconds=$(parse_retry_after_seconds "$details_file") + if [[ "$retry_seconds" -le 0 ]]; then + case "$reason" in + rate_limit) retry_seconds=900 ;; + auth_error) retry_seconds=3600 ;; + *) retry_seconds=300 ;; + esac + fi + retry_after=$(date -u -v+"${retry_seconds}"S '+%Y-%m-%dT%H:%M:%SZ' 2>/dev/null || date -u -d "+${retry_seconds} seconds" '+%Y-%m-%dT%H:%M:%SZ' 2>/dev/null || printf '%s' "") + db_query " +INSERT INTO provider_backoff (provider, reason, retry_after, auth_signature, details, updated_at) +VALUES ( + '$(sql_escape "$provider")', + '$(sql_escape "$reason")', + '$(sql_escape "$retry_after")', + '$(sql_escape "$auth_signature")', + '$(sql_escape "$details")', + strftime('%Y-%m-%dT%H:%M:%SZ', 'now') +) +ON CONFLICT(provider) DO UPDATE SET + reason = excluded.reason, + retry_after = excluded.retry_after, + auth_signature = excluded.auth_signature, + details = excluded.details, + updated_at = excluded.updated_at; +" >/dev/null + return 0 +} + +provider_backoff_active() { + local provider="$1" + local row stored_retry_after stored_signature current_signature + row=$(db_query "SELECT reason || '|' || retry_after || '|' || auth_signature FROM provider_backoff WHERE provider = '$(sql_escape "$provider")';") + if [[ -z "$row" ]]; then + return 1 + fi + + IFS='|' read -r stored_reason stored_retry_after stored_signature <<<"$row" + current_signature=$(get_auth_signature "$provider") + if [[ -n "$stored_signature" && -n "$current_signature" && "$stored_signature" != "$current_signature" ]]; then + clear_provider_backoff "$provider" + return 1 + fi + + if [[ -n "$stored_retry_after" ]]; then + local now_epoch retry_epoch + now_epoch=$(date -u '+%s') + retry_epoch=$(date -j -u -f '%Y-%m-%dT%H:%M:%SZ' "$stored_retry_after" '+%s' 2>/dev/null || date -u -d "$stored_retry_after" '+%s' 2>/dev/null || printf '%s' "0") + if [[ "$retry_epoch" -le "$now_epoch" ]]; then + clear_provider_backoff "$provider" + return 1 + fi + fi + + return 0 +} + +classify_failure_reason() { + local file_path="$1" + local lowered + lowered=$( + python3 - "$file_path" <<'PY' +from pathlib import Path +import sys +print(Path(sys.argv[1]).read_text(errors="ignore").lower()) +PY + ) + if [[ "$lowered" == *"rate limit"* ]] || [[ "$lowered" == *"429"* ]] || [[ "$lowered" == *"too many requests"* ]]; then + printf '%s' "rate_limit" + return 0 + fi + if [[ "$lowered" == *"unauthorized"* ]] || [[ "$lowered" == *"401"* ]] || [[ "$lowered" == *"invalid api key"* ]] || [[ "$lowered" == *"authentication"* ]] || [[ "$lowered" == *"auth"* && "$lowered" == *"failed"* ]]; then + printf '%s' "auth_error" + return 0 + fi + printf '%s' "provider_error" + return 0 +} + +extract_session_id_from_output() { + local file_path="$1" + python3 - "$file_path" <<'PY' +import json +import sys +from pathlib import Path + +session_id = "" +for line in Path(sys.argv[1]).read_text(errors="ignore").splitlines(): + line = line.strip() + if not line or not line.startswith("{"): + continue + try: + obj = json.loads(line) + except Exception: + continue + if obj.get("sessionID"): + session_id = obj["sessionID"] + break + part = obj.get("part") or {} + if part.get("sessionID"): + session_id = part["sessionID"] + break +print(session_id) +PY + return 0 +} + +output_has_activity() { + local file_path="$1" + python3 - "$file_path" <<'PY' +import json +import sys +from pathlib import Path + +activity = False +for line in Path(sys.argv[1]).read_text(errors="ignore").splitlines(): + line = line.strip() + if not line or not line.startswith("{"): + continue + try: + obj = json.loads(line) + except Exception: + continue + event_type = obj.get("type", "") + if event_type in {"text", "tool", "tool-invocation", "tool-result", "step-start", "step_finish", "step-finish", "reasoning"}: + activity = True + break + +print("1" if activity else "0") +PY + return 0 +} + +choose_model() { + local role="$1" + local explicit_model="${2:-}" + local -a models=() + local provider last_provider start_index i idx current_model current_provider + + if [[ -n "$explicit_model" ]]; then + if [[ "$explicit_model" == opencode/* ]]; then + print_error "Headless runtime rejects gateway model: $explicit_model" + return 1 + fi + provider=$(extract_provider "$explicit_model" 2>/dev/null || printf '%s' "") + if [[ -z "$provider" ]]; then + print_error "Model must use provider/model format: $explicit_model" + return 1 + fi + if provider_backoff_active "$provider"; then + print_warning "$provider is currently backed off — refusing explicit model $explicit_model" + return 75 + fi + printf '%s' "$explicit_model" + return 0 + fi + + while IFS= read -r current_model; do + models+=("$current_model") + done < <(get_configured_models) + if [[ ${#models[@]} -eq 0 ]]; then + print_error "No direct provider models configured for headless runtime" + return 1 + fi + + last_provider=$(get_last_provider "$role") + start_index=0 + if [[ -n "$last_provider" ]]; then + for i in "${!models[@]}"; do + current_provider=$(extract_provider "${models[$i]}") + if [[ "$current_provider" == "$last_provider" ]]; then + start_index=$(((i + 1) % ${#models[@]})) + break + fi + done + fi + + for ((i = 0; i < ${#models[@]}; i++)); do + idx=$(((start_index + i) % ${#models[@]})) + current_model="${models[$idx]}" + current_provider=$(extract_provider "$current_model") + if provider_backoff_active "$current_provider"; then + continue + fi + set_last_provider "$role" "$current_provider" + printf '%s' "$current_model" + return 0 + done + + print_warning "All configured providers are currently backed off" + return 75 +} + +cmd_select() { + local role="worker" + local model_override="" + while [[ $# -gt 0 ]]; do + case "$1" in + --role) + role="${2:-}" + shift 2 + ;; + --model) + model_override="${2:-}" + shift 2 + ;; + *) + print_error "Unknown option for select: $1" + return 1 + ;; + esac + done + + local selected + selected=$(choose_model "$role" "$model_override") || return $? + printf '%s\n' "$selected" + return 0 +} + +cmd_backoff() { + local action="${1:-status}" + shift || true + case "$action" in + status) + db_query "SELECT provider || '|' || reason || '|' || retry_after || '|' || updated_at FROM provider_backoff ORDER BY provider;" + return 0 + ;; + clear) + local provider="${1:-}" + [[ -n "$provider" ]] || { + print_error "Usage: backoff clear <provider>" + return 1 + } + clear_provider_backoff "$provider" + return 0 + ;; + set) + local provider="${1:-}" + local reason="${2:-provider_error}" + local retry_seconds="${3:-300}" + [[ -n "$provider" ]] || { + print_error "Usage: backoff set <provider> <reason> [retry_seconds]" + return 1 + } + local tmp_file + tmp_file=$(mktemp) + printf 'manual backoff %s %s %s\n' "$provider" "$reason" "$retry_seconds" >"$tmp_file" + record_provider_backoff "$provider" "$reason" "$tmp_file" + if [[ "$retry_seconds" != "300" ]]; then + local retry_after + retry_after=$(date -u -v+"${retry_seconds}"S '+%Y-%m-%dT%H:%M:%SZ' 2>/dev/null || date -u -d "+${retry_seconds} seconds" '+%Y-%m-%dT%H:%M:%SZ' 2>/dev/null || printf '%s' "") + db_query "UPDATE provider_backoff SET retry_after = '$(sql_escape "$retry_after")' WHERE provider = '$(sql_escape "$provider")';" >/dev/null + fi + rm -f "$tmp_file" + return 0 + ;; + *) + print_error "Unknown backoff action: $action" + return 1 + ;; + esac +} + +cmd_session() { + local action="${1:-status}" + shift || true + case "$action" in + status) + db_query "SELECT provider || '|' || session_key || '|' || session_id || '|' || model || '|' || updated_at FROM provider_sessions ORDER BY provider, session_key;" + return 0 + ;; + clear) + local provider="${1:-}" + local session_key="${2:-}" + [[ -n "$provider" && -n "$session_key" ]] || { + print_error "Usage: session clear <provider> <session_key>" + return 1 + } + db_query "DELETE FROM provider_sessions WHERE provider = '$(sql_escape "$provider")' AND session_key = '$(sql_escape "$session_key")';" >/dev/null + return 0 + ;; + *) + print_error "Unknown session action: $action" + return 1 + ;; + esac +} + +cmd_run() { + local role="worker" + local session_key="" + local work_dir="" + local title="" + local prompt="" + local prompt_file="" + local model_override="" + local agent_name="" + local -a extra_args=() + + while [[ $# -gt 0 ]]; do + case "$1" in + --role) + role="${2:-}" + shift 2 + ;; + --session-key) + session_key="${2:-}" + shift 2 + ;; + --dir) + work_dir="${2:-}" + shift 2 + ;; + --title) + title="${2:-}" + shift 2 + ;; + --prompt) + prompt="${2:-}" + shift 2 + ;; + --prompt-file) + prompt_file="${2:-}" + shift 2 + ;; + --model) + model_override="${2:-}" + shift 2 + ;; + --agent) + agent_name="${2:-}" + shift 2 + ;; + --opencode-arg) + extra_args+=("${2:-}") + shift 2 + ;; + *) + print_error "Unknown option for run: $1" + return 1 + ;; + esac + done + + [[ -n "$session_key" ]] || { + print_error "run requires --session-key" + return 1 + } + [[ -n "$work_dir" ]] || { + print_error "run requires --dir" + return 1 + } + [[ -n "$title" ]] || { + print_error "run requires --title" + return 1 + } + if [[ -z "$prompt" && -n "$prompt_file" ]]; then + [[ -f "$prompt_file" ]] || { + print_error "Prompt file not found: $prompt_file" + return 1 + } + prompt=$(<"$prompt_file") + fi + [[ -n "$prompt" ]] || { + print_error "run requires --prompt or --prompt-file" + return 1 + } + + local selected_model provider persisted_session + selected_model=$(choose_model "$role" "$model_override") || return $? + provider=$(extract_provider "$selected_model") + persisted_session=$(get_session_id "$provider" "$session_key") + + local -a cmd=("$OPENCODE_BIN_DEFAULT" run "$prompt" --dir "$work_dir" -m "$selected_model" --title "$title" --format json) + if [[ -n "$agent_name" ]]; then + cmd+=(--agent "$agent_name") + fi + if [[ -n "$persisted_session" ]]; then + cmd+=(--session "$persisted_session" --continue) + fi + if [[ ${#extra_args[@]} -gt 0 ]]; then + cmd+=("${extra_args[@]}") + fi + + local output_file + output_file=$(mktemp) + local exit_code=0 + "${cmd[@]}" 2>&1 | tee "$output_file" + exit_code=${PIPESTATUS[0]} + + local discovered_session + discovered_session=$(extract_session_id_from_output "$output_file") + local activity_detected + activity_detected=$(output_has_activity "$output_file") + if [[ "$exit_code" -eq 0 ]]; then + if [[ "$activity_detected" != "1" ]]; then + record_provider_backoff "$provider" "provider_error" "$output_file" + rm -f "$output_file" + print_warning "$provider returned exit 0 without any model activity; backing off provider" + return 75 + fi + if [[ -n "$discovered_session" ]]; then + store_session_id "$provider" "$session_key" "$discovered_session" "$selected_model" + fi + rm -f "$output_file" + return 0 + fi + + local failure_reason + failure_reason=$(classify_failure_reason "$output_file") + record_provider_backoff "$provider" "$failure_reason" "$output_file" + rm -f "$output_file" + return "$exit_code" +} + +show_help() { + cat <<'EOF' +headless-runtime-helper.sh - Provider-aware headless OpenCode runtime + +Usage: + headless-runtime-helper.sh select [--role pulse|worker] [--model provider/model] + headless-runtime-helper.sh run --role pulse|worker --session-key KEY --dir PATH --title TITLE (--prompt TEXT | --prompt-file FILE) [--model provider/model] [--agent NAME] [--opencode-arg ARG] + headless-runtime-helper.sh backoff [status|set PROVIDER REASON [SECONDS]|clear PROVIDER] + headless-runtime-helper.sh session [status|clear PROVIDER SESSION_KEY] + headless-runtime-helper.sh help + +Defaults: + AIDEVOPS_HEADLESS_MODELS defaults to anthropic/claude-sonnet-4-6,openai/gpt-5.3-codex + AIDEVOPS_HEADLESS_PROVIDER_ALLOWLIST can restrict selection to providers like: openai + Gateway models (opencode/*) are rejected for headless runs. +EOF + return 0 +} + +main() { + local command="${1:-help}" + shift || true + init_state_db + case "$command" in + select) + cmd_select "$@" + return $? + ;; + run) + cmd_run "$@" + return $? + ;; + backoff) + cmd_backoff "$@" + return $? + ;; + session) + cmd_session "$@" + return $? + ;; + help | --help | -h) + show_help + return 0 + ;; + *) + print_error "Unknown command: $command" + show_help + return 1 + ;; + esac +} + +main "$@" diff --git a/.agents/scripts/pulse-wrapper.sh b/.agents/scripts/pulse-wrapper.sh index d053ddd876..112c160ee8 100755 --- a/.agents/scripts/pulse-wrapper.sh +++ b/.agents/scripts/pulse-wrapper.sh @@ -12,6 +12,7 @@ # 5. Internal watchdog kills stuck pulses after PULSE_STALE_THRESHOLD (t1397) # 6. Self-watchdog: idle detection kills pulse when CPU drops to zero (t1398.3) # 7. Progress-based watchdog: kills if log output stalls for PULSE_PROGRESS_TIMEOUT (GH#2958) +# 8. Provider-aware pulse sessions via headless-runtime-helper.sh # # Lifecycle: launchd fires every 120s. If a pulse is still running, the # dedup check skips. run_pulse() has an internal watchdog that polls every @@ -96,10 +97,16 @@ SESSION_FLAG="${HOME}/.aidevops/logs/pulse-session.flag" STOP_FLAG="${HOME}/.aidevops/logs/pulse-session.stop" OPENCODE_BIN="${OPENCODE_BIN:-$(command -v opencode 2>/dev/null || echo "opencode")}" PULSE_DIR="${PULSE_DIR:-${HOME}/Git/aidevops}" -PULSE_MODEL="${PULSE_MODEL:-anthropic/claude-sonnet-4-6}" +PULSE_MODEL="${PULSE_MODEL:-}" +HEADLESS_RUNTIME_HELPER="${HEADLESS_RUNTIME_HELPER:-${SCRIPT_DIR}/headless-runtime-helper.sh}" REPOS_JSON="${REPOS_JSON:-${HOME}/.config/aidevops/repos.json}" STATE_FILE="${HOME}/.aidevops/logs/pulse-state.txt" +if [[ ! -x "$HEADLESS_RUNTIME_HELPER" ]]; then + printf '[pulse-wrapper] ERROR: headless runtime helper is missing or not executable: %s (SCRIPT_DIR=%s)\n' "$HEADLESS_RUNTIME_HELPER" "$SCRIPT_DIR" >&2 + exit 1 +fi + ####################################### # Ensure log directory exists ####################################### @@ -1260,12 +1267,14 @@ ${state_content} --- END PRE-FETCHED STATE ---" fi - # Run opencode in background - "$OPENCODE_BIN" run "$prompt" \ - --dir "$PULSE_DIR" \ - -m "$PULSE_MODEL" \ - --title "Supervisor Pulse" \ - >>"$LOGFILE" 2>&1 & + # Run the provider-aware headless wrapper in background. + # It alternates direct Anthropic/OpenAI models, persists pulse sessions per + # provider, and avoids opencode/* gateway models for headless runs. + local -a pulse_cmd=("$HEADLESS_RUNTIME_HELPER" run --role pulse --session-key supervisor-pulse --dir "$PULSE_DIR" --title "Supervisor Pulse" --prompt "$prompt") + if [[ -n "$PULSE_MODEL" ]]; then + pulse_cmd+=(--model "$PULSE_MODEL") + fi + "${pulse_cmd[@]}" >>"$LOGFILE" 2>&1 & local opencode_pid=$! echo "$opencode_pid" >"$PIDFILE" diff --git a/setup.sh b/setup.sh index 7678ffd744..fc1a45b24b 100755 --- a/setup.sh +++ b/setup.sh @@ -953,11 +953,28 @@ main() { # XML-escape paths for safe plist embedding (prevents injection # if $HOME or paths contain &, <, > characters) local _xml_wrapper_script _xml_home _xml_opencode_bin _xml_aidevops_dir _xml_path + local _headless_xml_env="" _xml_wrapper_script=$(_xml_escape "$wrapper_script") _xml_home=$(_xml_escape "$HOME") _xml_opencode_bin=$(_xml_escape "$opencode_bin") _xml_aidevops_dir=$(_xml_escape "$_aidevops_dir") _xml_path=$(_xml_escape "$PATH") + if [[ -n "${AIDEVOPS_HEADLESS_MODELS:-}" ]]; then + local _xml_headless_models + _xml_headless_models=$(_xml_escape "$AIDEVOPS_HEADLESS_MODELS") + _headless_xml_env+=$'\n' + _headless_xml_env+=$'\t\t<key>AIDEVOPS_HEADLESS_MODELS</key>' + _headless_xml_env+=$'\n' + _headless_xml_env+="\t\t<string>${_xml_headless_models}</string>" + fi + if [[ -n "${AIDEVOPS_HEADLESS_PROVIDER_ALLOWLIST:-}" ]]; then + local _xml_headless_allowlist + _xml_headless_allowlist=$(_xml_escape "$AIDEVOPS_HEADLESS_PROVIDER_ALLOWLIST") + _headless_xml_env+=$'\n' + _headless_xml_env+=$'\t\t<key>AIDEVOPS_HEADLESS_PROVIDER_ALLOWLIST</key>' + _headless_xml_env+=$'\n' + _headless_xml_env+="\t\t<string>${_xml_headless_allowlist}</string>" + fi # Write the plist (always regenerated to pick up config changes) cat >"$pulse_plist" <<PLIST @@ -990,6 +1007,7 @@ main() { <string>${_xml_aidevops_dir}</string> <key>PULSE_STALE_THRESHOLD</key> <string>1800</string> + ${_headless_xml_env} </dict> <key>RunAtLoad</key> <true/> @@ -1013,12 +1031,23 @@ PLIST # Remove old-style cron entries (direct opencode invocation) # Shell-escape all interpolated paths to prevent command injection # via $(…) or backticks if paths contain shell metacharacters - local _cron_aidevops_dir _cron_wrapper_script + local _cron_opencode_bin _cron_aidevops_dir _cron_wrapper_script _cron_headless_env="" + _cron_opencode_bin=$(_cron_escape "$opencode_bin") _cron_aidevops_dir=$(_cron_escape "$_aidevops_dir") _cron_wrapper_script=$(_cron_escape "$wrapper_script") + if [[ -n "${AIDEVOPS_HEADLESS_MODELS:-}" ]]; then + local _cron_headless_models + _cron_headless_models=$(_cron_escape "$AIDEVOPS_HEADLESS_MODELS") + _cron_headless_env+=" AIDEVOPS_HEADLESS_MODELS=${_cron_headless_models}" + fi + if [[ -n "${AIDEVOPS_HEADLESS_PROVIDER_ALLOWLIST:-}" ]]; then + local _cron_headless_allowlist + _cron_headless_allowlist=$(_cron_escape "$AIDEVOPS_HEADLESS_PROVIDER_ALLOWLIST") + _cron_headless_env+=" AIDEVOPS_HEADLESS_PROVIDER_ALLOWLIST=${_cron_headless_allowlist}" + fi ( crontab -l 2>/dev/null | grep -v 'aidevops: supervisor-pulse' - echo "*/2 * * * * PULSE_DIR=${_cron_aidevops_dir} /bin/bash ${_cron_wrapper_script} >> \"\$HOME/.aidevops/logs/pulse-wrapper.log\" 2>&1 # aidevops: supervisor-pulse" + echo "*/2 * * * * PATH=\"/usr/local/bin:/usr/bin:/bin\" OPENCODE_BIN=${_cron_opencode_bin} PULSE_DIR=${_cron_aidevops_dir}${_cron_headless_env} /bin/bash ${_cron_wrapper_script} >> \"\$HOME/.aidevops/logs/pulse-wrapper.log\" 2>&1 # aidevops: supervisor-pulse" ) | crontab - || true if crontab -l 2>/dev/null | grep -qF "aidevops: supervisor-pulse"; then print_info "Supervisor pulse enabled (cron, every 2 min). Disable: crontab -e and remove the supervisor-pulse line" diff --git a/tests/test-headless-runtime-helper.sh b/tests/test-headless-runtime-helper.sh new file mode 100755 index 0000000000..00710ac4e8 --- /dev/null +++ b/tests/test-headless-runtime-helper.sh @@ -0,0 +1,170 @@ +#!/usr/bin/env bash + +set -euo pipefail + +REPO_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +HELPER="$REPO_DIR/.agents/scripts/headless-runtime-helper.sh" +VERBOSE="${1:-}" + +PASS_COUNT=0 +FAIL_COUNT=0 +TOTAL_COUNT=0 + +pass() { + local message="$1" + PASS_COUNT=$((PASS_COUNT + 1)) + TOTAL_COUNT=$((TOTAL_COUNT + 1)) + if [[ "$VERBOSE" == "--verbose" ]]; then + printf " PASS %s\n" "$message" + fi + return 0 +} + +fail() { + local message="$1" + local detail="${2:-}" + FAIL_COUNT=$((FAIL_COUNT + 1)) + TOTAL_COUNT=$((TOTAL_COUNT + 1)) + printf " FAIL %s\n" "$message" + if [[ -n "$detail" ]]; then + printf " %s\n" "$detail" + fi + return 0 +} + +section() { + local title="$1" + echo "" + printf "=== %s ===\n" "$title" + return 0 +} + +TEST_TMP_DIR=$(mktemp -d) +export AIDEVOPS_HEADLESS_RUNTIME_DIR="$TEST_TMP_DIR/runtime" +export STUB_LOG_FILE="$TEST_TMP_DIR/opencode-args.log" + +cleanup() { + rm -rf "$TEST_TMP_DIR" + return 0 +} +trap cleanup EXIT + +cat >"$TEST_TMP_DIR/opencode-stub.sh" <<'STUB' +#!/usr/bin/env bash +set -euo pipefail +printf '%s\n' "$*" >>"${STUB_LOG_FILE}" +session_id="${STUB_SESSION_ID:-ses_stub_default}" +text="${STUB_TEXT:-OK}" +emit_activity="${STUB_EMIT_ACTIVITY:-1}" +if [[ "$emit_activity" != "1" ]]; then + exit 0 +fi +cat <<JSON +{"type":"step_start","sessionID":"${session_id}","part":{"sessionID":"${session_id}"}} +{"type":"text","sessionID":"${session_id}","part":{"sessionID":"${session_id}","text":"${text}"}} +JSON +exit 0 +STUB +chmod +x "$TEST_TMP_DIR/opencode-stub.sh" +export OPENCODE_BIN="$TEST_TMP_DIR/opencode-stub.sh" + +section "Syntax" +if bash -n "$HELPER"; then + pass "bash -n" +else + fail "bash -n" "syntax error" +fi + +section "Selection Defaults" +first_model=$(bash "$HELPER" select --role worker 2>/dev/null || true) +second_model=$(bash "$HELPER" select --role worker 2>/dev/null || true) +if [[ "$first_model" == "anthropic/claude-sonnet-4-6" ]]; then + pass "first selection uses anthropic default" +else + fail "first selection uses anthropic default" "got: $first_model" +fi +if [[ "$second_model" == "openai/gpt-5.3-codex" ]]; then + pass "second selection alternates to openai" +else + fail "second selection alternates to openai" "got: $second_model" +fi + +section "Allowlist" +allowlisted_model=$(AIDEVOPS_HEADLESS_PROVIDER_ALLOWLIST=openai bash "$HELPER" select --role worker 2>/dev/null || true) +if [[ "$allowlisted_model" == "openai/gpt-5.3-codex" ]]; then + pass "openai allowlist restricts selection" +else + fail "openai allowlist restricts selection" "got: $allowlisted_model" +fi + +section "Backoff" +bash "$HELPER" backoff set anthropic rate_limit 3600 >/dev/null +post_backoff_model=$(bash "$HELPER" select --role pulse 2>/dev/null || true) +if [[ "$post_backoff_model" == "openai/gpt-5.3-codex" ]]; then + pass "backed off anthropic is skipped" +else + fail "backed off anthropic is skipped" "got: $post_backoff_model" +fi + +section "Auth Change Clears Backoff" +export AIDEVOPS_HEADLESS_AUTH_SIGNATURE_OPENAI="sig-old" +bash "$HELPER" backoff set openai auth_error 3600 >/dev/null +export AIDEVOPS_HEADLESS_PROVIDER_ALLOWLIST=openai +export AIDEVOPS_HEADLESS_AUTH_SIGNATURE_OPENAI="sig-new" +recovered_model=$(bash "$HELPER" select --role pulse 2>/dev/null || true) +if [[ "$recovered_model" == "openai/gpt-5.3-codex" ]]; then + pass "auth signature change clears backoff" +else + fail "auth signature change clears backoff" "got: $recovered_model" +fi +unset AIDEVOPS_HEADLESS_PROVIDER_ALLOWLIST + +section "Session Persistence" +export STUB_SESSION_ID="ses_openai_one" +rm -f "$STUB_LOG_FILE" +AIDEVOPS_HEADLESS_PROVIDER_ALLOWLIST=openai bash "$HELPER" run \ + --role worker \ + --session-key issue-101 \ + --dir "$REPO_DIR" \ + --title "Issue #101" \ + --prompt "Reply with exactly OK" >/dev/null +export STUB_SESSION_ID="ses_openai_two" +AIDEVOPS_HEADLESS_PROVIDER_ALLOWLIST=openai bash "$HELPER" run \ + --role worker \ + --session-key issue-101 \ + --dir "$REPO_DIR" \ + --title "Issue #101" \ + --prompt "Reply with exactly OK" >/dev/null + +if grep -q -- '--session ses_openai_one --continue' "$STUB_LOG_FILE"; then + pass "second run reuses persisted provider session" +else + fail "second run reuses persisted provider session" "logged args: $(tr '\n' ' ' <"$STUB_LOG_FILE")" +fi + +section "Zero Activity Success Is Rejected" +export STUB_EMIT_ACTIVITY="0" +if AIDEVOPS_HEADLESS_PROVIDER_ALLOWLIST=openai bash "$HELPER" run \ + --role worker \ + --session-key issue-202 \ + --dir "$REPO_DIR" \ + --title "Issue #202" \ + --prompt "Reply with exactly OK" >/dev/null 2>&1; then + fail "zero-activity success is rejected" "helper accepted a run with no model activity" +else + backoff_state=$(bash "$HELPER" backoff status 2>/dev/null || true) + if [[ "$backoff_state" == *"openai|provider_error|"* ]]; then + pass "zero-activity success is rejected" + else + fail "zero-activity success is rejected" "missing provider_error backoff state: $backoff_state" + fi +fi +unset STUB_EMIT_ACTIVITY + +echo "" +printf "Total: %d, Passed: %d, Failed: %d\n" "$TOTAL_COUNT" "$PASS_COUNT" "$FAIL_COUNT" + +if [[ "$FAIL_COUNT" -eq 0 ]]; then + exit 0 +fi +exit 1